agcodex_core/tools/
grep_simple.rs

1//! Simplified AST-grep based semantic search tool for AGCodex.
2//!
3//! This is a simplified implementation that uses text-based pattern matching
4//! instead of the complex ast-grep API to avoid compilation issues.
5
6use super::Change;
7use super::ChangeKind;
8use super::ComprehensiveSemanticImpact;
9use super::ComprehensiveToolOutput;
10use super::ContextSnapshot;
11use super::OperationContext;
12use super::OperationMetadata;
13use super::OperationScope;
14use super::PerformanceMetrics;
15use super::ScopeType;
16// Import SourceLocation from ast module
17use agcodex_ast::SourceLocation;
18use dashmap::DashMap;
19use serde::Deserialize;
20use serde::Serialize;
21use std::path::Path;
22use std::path::PathBuf;
23use std::sync::Arc;
24use std::time::Duration;
25use std::time::Instant;
26use thiserror::Error;
27use tracing::error;
28use uuid::Uuid;
29
30/// Errors specific to grep operations
31#[derive(Debug, Error)]
32pub enum GrepError {
33    #[error("invalid pattern: {pattern} - {reason}")]
34    InvalidPattern { pattern: String, reason: String },
35
36    #[error("unsupported language: {language}")]
37    UnsupportedLanguage { language: String },
38
39    #[error("query compilation failed: {query} - {reason}")]
40    QueryCompilationFailed { query: String, reason: String },
41
42    #[error("YAML rule parsing failed: {rule} - {reason}")]
43    YamlRuleFailed { rule: String, reason: String },
44
45    #[error("search timeout after {duration:?}")]
46    SearchTimeout { duration: Duration },
47
48    #[error("file access error: {path} - {reason}")]
49    FileAccess { path: PathBuf, reason: String },
50
51    #[error("parse error for {path}: {reason}")]
52    ParseError { path: PathBuf, reason: String },
53
54    #[error("pattern cache overflow: {current_size} >= {max_size}")]
55    CacheOverflow {
56        current_size: usize,
57        max_size: usize,
58    },
59
60    #[error("performance threshold exceeded: {actual_ms}ms > {threshold_ms}ms")]
61    PerformanceThreshold { actual_ms: u64, threshold_ms: u64 },
62
63    #[error(transparent)]
64    Io(#[from] std::io::Error),
65}
66
67/// Result type for grep operations
68pub type GrepResult<T> = std::result::Result<T, GrepError>;
69
70/// Type of pattern/rule being used
71#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
72pub enum RuleType {
73    /// Simple string pattern
74    Pattern,
75    /// Tree-sitter query
76    Query,
77    /// YAML rule configuration
78    YamlRule,
79}
80
81/// Supported programming languages
82#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
83pub enum SupportedLanguage {
84    Rust,
85    Python,
86    JavaScript,
87    TypeScript,
88    Go,
89    Java,
90    C,
91    Cpp,
92    CSharp,
93    Html,
94    Css,
95    Json,
96    Yaml,
97    Toml,
98    Bash,
99    Ruby,
100    Php,
101    Haskell,
102    Elixir,
103    Swift,
104    Kotlin,
105    Sql,
106    Dockerfile,
107    Markdown,
108}
109
110impl SupportedLanguage {
111    pub const fn as_str(&self) -> &str {
112        match self {
113            Self::Rust => "rust",
114            Self::Python => "python",
115            Self::JavaScript => "javascript",
116            Self::TypeScript => "typescript",
117            Self::Go => "go",
118            Self::Java => "java",
119            Self::C => "c",
120            Self::Cpp => "cpp",
121            Self::CSharp => "csharp",
122            Self::Html => "html",
123            Self::Css => "css",
124            Self::Json => "json",
125            Self::Yaml => "yaml",
126            Self::Toml => "toml",
127            Self::Bash => "bash",
128            Self::Ruby => "ruby",
129            Self::Php => "php",
130            Self::Haskell => "haskell",
131            Self::Elixir => "elixir",
132            Self::Swift => "swift",
133            Self::Kotlin => "kotlin",
134            Self::Sql => "sql",
135            Self::Dockerfile => "dockerfile",
136            Self::Markdown => "markdown",
137        }
138    }
139}
140
141/// A match found during grep search
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct GrepMatch {
144    pub file: PathBuf,
145    pub line: usize,
146    pub column: usize,
147    pub end_line: usize,
148    pub end_column: usize,
149    pub matched_text: String,
150    pub context_before: Vec<String>,
151    pub context_after: Vec<String>,
152    pub confidence: f32,
153    pub byte_offset: usize,
154}
155
156/// Search query for grep operations
157#[derive(Debug, Clone)]
158pub struct GrepQuery {
159    pub pattern: String,
160    pub paths: Vec<PathBuf>,
161    pub language: Option<SupportedLanguage>,
162    pub rule_type: RuleType,
163    pub max_results: Option<usize>,
164    pub include_hidden: bool,
165    pub follow_symlinks: bool,
166    pub case_sensitive: bool,
167    pub whole_word: bool,
168    pub context_lines: usize,
169}
170
171impl Default for GrepQuery {
172    fn default() -> Self {
173        Self {
174            pattern: String::new(),
175            paths: Vec::new(),
176            language: None,
177            rule_type: RuleType::Pattern,
178            max_results: Some(1000),
179            include_hidden: false,
180            follow_symlinks: false,
181            case_sensitive: true,
182            whole_word: false,
183            context_lines: 3,
184        }
185    }
186}
187
188/// Configuration for grep operations
189#[derive(Debug, Clone)]
190pub struct GrepConfig {
191    pub max_file_size: usize,
192    pub max_pattern_cache_size: usize,
193    pub parallel_threshold: usize,
194    pub timeout: Duration,
195    pub performance_threshold_ms: u64,
196}
197
198impl Default for GrepConfig {
199    fn default() -> Self {
200        Self {
201            max_file_size: 10 * 1024 * 1024, // 10MB
202            max_pattern_cache_size: 1000,
203            parallel_threshold: 10,
204            timeout: Duration::from_secs(30),
205            performance_threshold_ms: 5000,
206        }
207    }
208}
209
210/// Simplified grep engine
211pub struct SimpleGrepEngine {
212    _config: GrepConfig,
213    _pattern_cache: Arc<DashMap<String, Vec<GrepMatch>>>,
214}
215
216impl SimpleGrepEngine {
217    pub fn new(config: GrepConfig) -> Self {
218        Self {
219            _config: config,
220            _pattern_cache: Arc::new(DashMap::new()),
221        }
222    }
223
224    /// Search files with pattern
225    pub fn search_files(&self, query: &GrepQuery) -> GrepResult<Vec<GrepMatch>> {
226        let mut all_matches = Vec::new();
227
228        for path in &query.paths {
229            if path.is_file() {
230                let matches = self.search_file(path, &query.pattern, query)?;
231                all_matches.extend(matches);
232            } else if path.is_dir() {
233                let matches = self.search_directory(path, &query.pattern, query)?;
234                all_matches.extend(matches);
235            }
236
237            // Check max results
238            if let Some(max) = query.max_results
239                && all_matches.len() >= max
240            {
241                all_matches.truncate(max);
242                break;
243            }
244        }
245
246        Ok(all_matches)
247    }
248
249    /// Search a single file
250    fn search_file(
251        &self,
252        path: &Path,
253        pattern: &str,
254        query: &GrepQuery,
255    ) -> GrepResult<Vec<GrepMatch>> {
256        let content = std::fs::read_to_string(path)?;
257        let lines: Vec<&str> = content.lines().collect();
258        let mut matches = Vec::new();
259
260        for (line_idx, line) in lines.iter().enumerate() {
261            if self.line_matches(line, pattern, query) {
262                let line_num = line_idx + 1;
263
264                // Get context
265                let context_before = if line_idx > 0 {
266                    let start = line_idx.saturating_sub(query.context_lines);
267                    lines[start..line_idx]
268                        .iter()
269                        .map(|s| (*s).to_string())
270                        .collect()
271                } else {
272                    Vec::new()
273                };
274
275                let context_after = if line_idx < lines.len() - 1 {
276                    let end = std::cmp::min(line_idx + 1 + query.context_lines, lines.len());
277                    lines[line_idx + 1..end]
278                        .iter()
279                        .map(|s| (*s).to_string())
280                        .collect()
281                } else {
282                    Vec::new()
283                };
284
285                matches.push(GrepMatch {
286                    file: path.to_path_buf(),
287                    line: line_num,
288                    column: 1,
289                    end_line: line_num,
290                    end_column: line.len(),
291                    matched_text: (*line).to_string(),
292                    context_before,
293                    context_after,
294                    confidence: 1.0,
295                    byte_offset: 0, // Simplified
296                });
297            }
298        }
299
300        Ok(matches)
301    }
302
303    /// Search a directory recursively
304    fn search_directory(
305        &self,
306        dir: &Path,
307        pattern: &str,
308        query: &GrepQuery,
309    ) -> GrepResult<Vec<GrepMatch>> {
310        let mut all_matches = Vec::new();
311
312        for entry in std::fs::read_dir(dir)? {
313            let entry = entry?;
314            let path = entry.path();
315
316            if path.is_file() {
317                if let Ok(matches) = self.search_file(&path, pattern, query) {
318                    all_matches.extend(matches);
319                }
320            } else if path.is_dir()
321                && !path
322                    .file_name()
323                    .unwrap_or_default()
324                    .to_string_lossy()
325                    .starts_with('.')
326                && let Ok(matches) = self.search_directory(&path, pattern, query)
327            {
328                all_matches.extend(matches);
329            }
330        }
331
332        Ok(all_matches)
333    }
334
335    /// Check if a line matches the pattern
336    fn line_matches(&self, line: &str, pattern: &str, query: &GrepQuery) -> bool {
337        if query.case_sensitive {
338            line.contains(pattern)
339        } else {
340            line.to_lowercase().contains(&pattern.to_lowercase())
341        }
342    }
343}
344
345/// Main grep tool
346pub struct GrepTool {
347    engine: SimpleGrepEngine,
348}
349
350impl GrepTool {
351    pub fn new(config: GrepConfig) -> Self {
352        Self {
353            engine: SimpleGrepEngine::new(config),
354        }
355    }
356
357    /// Execute search with full query object
358    pub fn search_with_query(
359        &self,
360        query: GrepQuery,
361    ) -> GrepResult<ComprehensiveToolOutput<Vec<GrepMatch>>> {
362        let start = Instant::now();
363
364        // Perform search
365        let matches = self.engine.search_files(&query)?;
366
367        let duration = start.elapsed();
368
369        // Build comprehensive output
370        let first_path = query
371            .paths
372            .first()
373            .cloned()
374            .unwrap_or_else(|| PathBuf::from("unknown"));
375        let context = OperationContext {
376            before: ContextSnapshot {
377                content: format!("Searching for pattern: {}", query.pattern),
378                timestamp: std::time::SystemTime::now(),
379                content_hash: format!("{:x}", md5::compute(&query.pattern)),
380                ast_summary: None,
381                symbols: Vec::new(),
382            },
383            after: None,
384            surrounding: Vec::new(),
385            location: SourceLocation {
386                file_path: first_path.to_string_lossy().to_string(),
387                start_line: 0,
388                start_column: 0,
389                end_line: 0,
390                end_column: 0,
391                byte_range: (0, 0),
392            },
393            scope: OperationScope {
394                scope_type: ScopeType::File,
395                name: "search".to_string(),
396                path: vec!["grep".to_string()],
397                file_path: first_path.clone(),
398                line_range: 0..0,
399            },
400            language_context: None,
401            project_context: None,
402        };
403
404        let changes = matches
405            .iter()
406            .map(|m| Change {
407                id: Uuid::new_v4(),
408                kind: ChangeKind::Added {
409                    reason: format!("Pattern match found for '{}'", query.pattern),
410                    insertion_point: SourceLocation {
411                        file_path: m.file.to_string_lossy().to_string(),
412                        start_line: m.line,
413                        start_column: m.column,
414                        end_line: m.line,
415                        end_column: m.column + m.matched_text.len(),
416                        byte_range: (m.byte_offset, m.byte_offset + m.matched_text.len()),
417                    },
418                },
419                old: None,
420                new: Some(m.matched_text.clone()),
421                line_range: m.line..m.line + 1,
422                char_range: m.column..m.column + m.matched_text.len(),
423                location: SourceLocation {
424                    file_path: m.file.to_string_lossy().to_string(),
425                    start_line: m.line,
426                    start_column: m.column,
427                    end_line: m.line,
428                    end_column: m.column + m.matched_text.len(),
429                    byte_range: (m.byte_offset, m.byte_offset + m.matched_text.len()),
430                },
431                semantic_impact: ComprehensiveSemanticImpact::minimal(),
432                affected_symbols: Vec::new(),
433                confidence: m.confidence,
434                description: format!(
435                    "Found pattern '{}' in {} at line {}",
436                    query.pattern,
437                    m.file.display(),
438                    m.line
439                ),
440            })
441            .collect();
442
443        let summary = format!(
444            "Found {} matches for '{}' across {} files in {:?}",
445            matches.len(),
446            query.pattern,
447            query.paths.len(),
448            duration
449        );
450
451        Ok(ComprehensiveToolOutput {
452            result: matches,
453            context,
454            changes,
455            metadata: OperationMetadata {
456                tool: "grep".to_string(),
457                operation: "search".to_string(),
458                operation_id: Uuid::new_v4(),
459                started_at: std::time::SystemTime::now() - duration,
460                completed_at: std::time::SystemTime::now(),
461                confidence: 1.0,
462                parameters: [
463                    ("pattern".to_string(), query.pattern.clone()),
464                    ("rule_type".to_string(), format!("{:?}", query.rule_type)),
465                ]
466                .iter()
467                .cloned()
468                .collect(),
469                initiated_by: Some("user".to_string()),
470                session_id: Some(Uuid::new_v4()),
471                tool_version: "1.0.0".to_string(),
472            },
473            summary,
474            performance: PerformanceMetrics {
475                execution_time: duration,
476                phase_times: std::collections::HashMap::new(),
477                memory_usage: super::MemoryUsage::default(),
478                cpu_usage: super::CpuUsage::default(),
479                io_stats: super::IoStats::default(),
480                cache_stats: super::CacheStats::default(),
481            },
482            diagnostics: Vec::new(),
483        })
484    }
485}