1use super::Change;
7use super::ChangeKind;
8use super::ComprehensiveSemanticImpact;
9use super::ComprehensiveToolOutput;
10use super::ContextSnapshot;
11use super::OperationContext;
12use super::OperationMetadata;
13use super::OperationScope;
14use super::PerformanceMetrics;
15use super::ScopeType;
16use agcodex_ast::SourceLocation;
18use dashmap::DashMap;
19use serde::Deserialize;
20use serde::Serialize;
21use std::path::Path;
22use std::path::PathBuf;
23use std::sync::Arc;
24use std::time::Duration;
25use std::time::Instant;
26use thiserror::Error;
27use tracing::error;
28use uuid::Uuid;
29
30#[derive(Debug, Error)]
32pub enum GrepError {
33 #[error("invalid pattern: {pattern} - {reason}")]
34 InvalidPattern { pattern: String, reason: String },
35
36 #[error("unsupported language: {language}")]
37 UnsupportedLanguage { language: String },
38
39 #[error("query compilation failed: {query} - {reason}")]
40 QueryCompilationFailed { query: String, reason: String },
41
42 #[error("YAML rule parsing failed: {rule} - {reason}")]
43 YamlRuleFailed { rule: String, reason: String },
44
45 #[error("search timeout after {duration:?}")]
46 SearchTimeout { duration: Duration },
47
48 #[error("file access error: {path} - {reason}")]
49 FileAccess { path: PathBuf, reason: String },
50
51 #[error("parse error for {path}: {reason}")]
52 ParseError { path: PathBuf, reason: String },
53
54 #[error("pattern cache overflow: {current_size} >= {max_size}")]
55 CacheOverflow {
56 current_size: usize,
57 max_size: usize,
58 },
59
60 #[error("performance threshold exceeded: {actual_ms}ms > {threshold_ms}ms")]
61 PerformanceThreshold { actual_ms: u64, threshold_ms: u64 },
62
63 #[error(transparent)]
64 Io(#[from] std::io::Error),
65}
66
67pub type GrepResult<T> = std::result::Result<T, GrepError>;
69
70#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
72pub enum RuleType {
73 Pattern,
75 Query,
77 YamlRule,
79}
80
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
83pub enum SupportedLanguage {
84 Rust,
85 Python,
86 JavaScript,
87 TypeScript,
88 Go,
89 Java,
90 C,
91 Cpp,
92 CSharp,
93 Html,
94 Css,
95 Json,
96 Yaml,
97 Toml,
98 Bash,
99 Ruby,
100 Php,
101 Haskell,
102 Elixir,
103 Swift,
104 Kotlin,
105 Sql,
106 Dockerfile,
107 Markdown,
108}
109
110impl SupportedLanguage {
111 pub const fn as_str(&self) -> &str {
112 match self {
113 Self::Rust => "rust",
114 Self::Python => "python",
115 Self::JavaScript => "javascript",
116 Self::TypeScript => "typescript",
117 Self::Go => "go",
118 Self::Java => "java",
119 Self::C => "c",
120 Self::Cpp => "cpp",
121 Self::CSharp => "csharp",
122 Self::Html => "html",
123 Self::Css => "css",
124 Self::Json => "json",
125 Self::Yaml => "yaml",
126 Self::Toml => "toml",
127 Self::Bash => "bash",
128 Self::Ruby => "ruby",
129 Self::Php => "php",
130 Self::Haskell => "haskell",
131 Self::Elixir => "elixir",
132 Self::Swift => "swift",
133 Self::Kotlin => "kotlin",
134 Self::Sql => "sql",
135 Self::Dockerfile => "dockerfile",
136 Self::Markdown => "markdown",
137 }
138 }
139}
140
141#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct GrepMatch {
144 pub file: PathBuf,
145 pub line: usize,
146 pub column: usize,
147 pub end_line: usize,
148 pub end_column: usize,
149 pub matched_text: String,
150 pub context_before: Vec<String>,
151 pub context_after: Vec<String>,
152 pub confidence: f32,
153 pub byte_offset: usize,
154}
155
156#[derive(Debug, Clone)]
158pub struct GrepQuery {
159 pub pattern: String,
160 pub paths: Vec<PathBuf>,
161 pub language: Option<SupportedLanguage>,
162 pub rule_type: RuleType,
163 pub max_results: Option<usize>,
164 pub include_hidden: bool,
165 pub follow_symlinks: bool,
166 pub case_sensitive: bool,
167 pub whole_word: bool,
168 pub context_lines: usize,
169}
170
171impl Default for GrepQuery {
172 fn default() -> Self {
173 Self {
174 pattern: String::new(),
175 paths: Vec::new(),
176 language: None,
177 rule_type: RuleType::Pattern,
178 max_results: Some(1000),
179 include_hidden: false,
180 follow_symlinks: false,
181 case_sensitive: true,
182 whole_word: false,
183 context_lines: 3,
184 }
185 }
186}
187
188#[derive(Debug, Clone)]
190pub struct GrepConfig {
191 pub max_file_size: usize,
192 pub max_pattern_cache_size: usize,
193 pub parallel_threshold: usize,
194 pub timeout: Duration,
195 pub performance_threshold_ms: u64,
196}
197
198impl Default for GrepConfig {
199 fn default() -> Self {
200 Self {
201 max_file_size: 10 * 1024 * 1024, max_pattern_cache_size: 1000,
203 parallel_threshold: 10,
204 timeout: Duration::from_secs(30),
205 performance_threshold_ms: 5000,
206 }
207 }
208}
209
210pub struct SimpleGrepEngine {
212 _config: GrepConfig,
213 _pattern_cache: Arc<DashMap<String, Vec<GrepMatch>>>,
214}
215
216impl SimpleGrepEngine {
217 pub fn new(config: GrepConfig) -> Self {
218 Self {
219 _config: config,
220 _pattern_cache: Arc::new(DashMap::new()),
221 }
222 }
223
224 pub fn search_files(&self, query: &GrepQuery) -> GrepResult<Vec<GrepMatch>> {
226 let mut all_matches = Vec::new();
227
228 for path in &query.paths {
229 if path.is_file() {
230 let matches = self.search_file(path, &query.pattern, query)?;
231 all_matches.extend(matches);
232 } else if path.is_dir() {
233 let matches = self.search_directory(path, &query.pattern, query)?;
234 all_matches.extend(matches);
235 }
236
237 if let Some(max) = query.max_results
239 && all_matches.len() >= max
240 {
241 all_matches.truncate(max);
242 break;
243 }
244 }
245
246 Ok(all_matches)
247 }
248
249 fn search_file(
251 &self,
252 path: &Path,
253 pattern: &str,
254 query: &GrepQuery,
255 ) -> GrepResult<Vec<GrepMatch>> {
256 let content = std::fs::read_to_string(path)?;
257 let lines: Vec<&str> = content.lines().collect();
258 let mut matches = Vec::new();
259
260 for (line_idx, line) in lines.iter().enumerate() {
261 if self.line_matches(line, pattern, query) {
262 let line_num = line_idx + 1;
263
264 let context_before = if line_idx > 0 {
266 let start = line_idx.saturating_sub(query.context_lines);
267 lines[start..line_idx]
268 .iter()
269 .map(|s| (*s).to_string())
270 .collect()
271 } else {
272 Vec::new()
273 };
274
275 let context_after = if line_idx < lines.len() - 1 {
276 let end = std::cmp::min(line_idx + 1 + query.context_lines, lines.len());
277 lines[line_idx + 1..end]
278 .iter()
279 .map(|s| (*s).to_string())
280 .collect()
281 } else {
282 Vec::new()
283 };
284
285 matches.push(GrepMatch {
286 file: path.to_path_buf(),
287 line: line_num,
288 column: 1,
289 end_line: line_num,
290 end_column: line.len(),
291 matched_text: (*line).to_string(),
292 context_before,
293 context_after,
294 confidence: 1.0,
295 byte_offset: 0, });
297 }
298 }
299
300 Ok(matches)
301 }
302
303 fn search_directory(
305 &self,
306 dir: &Path,
307 pattern: &str,
308 query: &GrepQuery,
309 ) -> GrepResult<Vec<GrepMatch>> {
310 let mut all_matches = Vec::new();
311
312 for entry in std::fs::read_dir(dir)? {
313 let entry = entry?;
314 let path = entry.path();
315
316 if path.is_file() {
317 if let Ok(matches) = self.search_file(&path, pattern, query) {
318 all_matches.extend(matches);
319 }
320 } else if path.is_dir()
321 && !path
322 .file_name()
323 .unwrap_or_default()
324 .to_string_lossy()
325 .starts_with('.')
326 && let Ok(matches) = self.search_directory(&path, pattern, query)
327 {
328 all_matches.extend(matches);
329 }
330 }
331
332 Ok(all_matches)
333 }
334
335 fn line_matches(&self, line: &str, pattern: &str, query: &GrepQuery) -> bool {
337 if query.case_sensitive {
338 line.contains(pattern)
339 } else {
340 line.to_lowercase().contains(&pattern.to_lowercase())
341 }
342 }
343}
344
345pub struct GrepTool {
347 engine: SimpleGrepEngine,
348}
349
350impl GrepTool {
351 pub fn new(config: GrepConfig) -> Self {
352 Self {
353 engine: SimpleGrepEngine::new(config),
354 }
355 }
356
357 pub fn search_with_query(
359 &self,
360 query: GrepQuery,
361 ) -> GrepResult<ComprehensiveToolOutput<Vec<GrepMatch>>> {
362 let start = Instant::now();
363
364 let matches = self.engine.search_files(&query)?;
366
367 let duration = start.elapsed();
368
369 let first_path = query
371 .paths
372 .first()
373 .cloned()
374 .unwrap_or_else(|| PathBuf::from("unknown"));
375 let context = OperationContext {
376 before: ContextSnapshot {
377 content: format!("Searching for pattern: {}", query.pattern),
378 timestamp: std::time::SystemTime::now(),
379 content_hash: format!("{:x}", md5::compute(&query.pattern)),
380 ast_summary: None,
381 symbols: Vec::new(),
382 },
383 after: None,
384 surrounding: Vec::new(),
385 location: SourceLocation {
386 file_path: first_path.to_string_lossy().to_string(),
387 start_line: 0,
388 start_column: 0,
389 end_line: 0,
390 end_column: 0,
391 byte_range: (0, 0),
392 },
393 scope: OperationScope {
394 scope_type: ScopeType::File,
395 name: "search".to_string(),
396 path: vec!["grep".to_string()],
397 file_path: first_path.clone(),
398 line_range: 0..0,
399 },
400 language_context: None,
401 project_context: None,
402 };
403
404 let changes = matches
405 .iter()
406 .map(|m| Change {
407 id: Uuid::new_v4(),
408 kind: ChangeKind::Added {
409 reason: format!("Pattern match found for '{}'", query.pattern),
410 insertion_point: SourceLocation {
411 file_path: m.file.to_string_lossy().to_string(),
412 start_line: m.line,
413 start_column: m.column,
414 end_line: m.line,
415 end_column: m.column + m.matched_text.len(),
416 byte_range: (m.byte_offset, m.byte_offset + m.matched_text.len()),
417 },
418 },
419 old: None,
420 new: Some(m.matched_text.clone()),
421 line_range: m.line..m.line + 1,
422 char_range: m.column..m.column + m.matched_text.len(),
423 location: SourceLocation {
424 file_path: m.file.to_string_lossy().to_string(),
425 start_line: m.line,
426 start_column: m.column,
427 end_line: m.line,
428 end_column: m.column + m.matched_text.len(),
429 byte_range: (m.byte_offset, m.byte_offset + m.matched_text.len()),
430 },
431 semantic_impact: ComprehensiveSemanticImpact::minimal(),
432 affected_symbols: Vec::new(),
433 confidence: m.confidence,
434 description: format!(
435 "Found pattern '{}' in {} at line {}",
436 query.pattern,
437 m.file.display(),
438 m.line
439 ),
440 })
441 .collect();
442
443 let summary = format!(
444 "Found {} matches for '{}' across {} files in {:?}",
445 matches.len(),
446 query.pattern,
447 query.paths.len(),
448 duration
449 );
450
451 Ok(ComprehensiveToolOutput {
452 result: matches,
453 context,
454 changes,
455 metadata: OperationMetadata {
456 tool: "grep".to_string(),
457 operation: "search".to_string(),
458 operation_id: Uuid::new_v4(),
459 started_at: std::time::SystemTime::now() - duration,
460 completed_at: std::time::SystemTime::now(),
461 confidence: 1.0,
462 parameters: [
463 ("pattern".to_string(), query.pattern.clone()),
464 ("rule_type".to_string(), format!("{:?}", query.rule_type)),
465 ]
466 .iter()
467 .cloned()
468 .collect(),
469 initiated_by: Some("user".to_string()),
470 session_id: Some(Uuid::new_v4()),
471 tool_version: "1.0.0".to_string(),
472 },
473 summary,
474 performance: PerformanceMetrics {
475 execution_time: duration,
476 phase_times: std::collections::HashMap::new(),
477 memory_usage: super::MemoryUsage::default(),
478 cpu_usage: super::CpuUsage::default(),
479 io_stats: super::IoStats::default(),
480 cache_stats: super::CacheStats::default(),
481 },
482 diagnostics: Vec::new(),
483 })
484 }
485}