1use crate::context::{ContextItem, ContextProvider, ContextQuery, ContextResult, ContextType};
11use async_trait::async_trait;
12use ignore::WalkBuilder;
13use regex::Regex;
14use std::fs;
15use std::path::{Path, PathBuf};
16
17#[derive(Debug, Clone)]
19pub struct RipgrepContextConfig {
20 pub root_path: PathBuf,
22 pub include_patterns: Vec<String>,
24 pub exclude_patterns: Vec<String>,
26 pub max_file_size: usize,
28 pub case_insensitive: bool,
30 pub context_lines: usize,
32}
33
34impl RipgrepContextConfig {
35 pub fn new(root_path: impl Into<PathBuf>) -> Self {
37 Self {
38 root_path: root_path.into(),
39 include_patterns: vec![
40 "**/*.rs".to_string(),
41 "**/*.py".to_string(),
42 "**/*.ts".to_string(),
43 "**/*.tsx".to_string(),
44 "**/*.js".to_string(),
45 "**/*.jsx".to_string(),
46 "**/*.go".to_string(),
47 "**/*.java".to_string(),
48 "**/*.c".to_string(),
49 "**/*.cpp".to_string(),
50 "**/*.h".to_string(),
51 "**/*.hpp".to_string(),
52 "**/*.md".to_string(),
53 "**/*.toml".to_string(),
54 "**/*.yaml".to_string(),
55 "**/*.yml".to_string(),
56 "**/*.json".to_string(),
57 ],
58 exclude_patterns: vec![
59 "**/target/**".to_string(),
60 "**/node_modules/**".to_string(),
61 "**/.git/**".to_string(),
62 "**/dist/**".to_string(),
63 "**/build/**".to_string(),
64 "**/*.lock".to_string(),
65 "**/vendor/**".to_string(),
66 "**/__pycache__/**".to_string(),
67 ],
68 max_file_size: 1024 * 1024, case_insensitive: true,
70 context_lines: 2,
71 }
72 }
73
74 pub fn with_include_patterns(mut self, patterns: Vec<String>) -> Self {
76 self.include_patterns = patterns;
77 self
78 }
79
80 pub fn with_exclude_patterns(mut self, patterns: Vec<String>) -> Self {
82 self.exclude_patterns = patterns;
83 self
84 }
85
86 pub fn with_max_file_size(mut self, size: usize) -> Self {
88 self.max_file_size = size;
89 self
90 }
91
92 pub fn with_case_insensitive(mut self, enabled: bool) -> Self {
94 self.case_insensitive = enabled;
95 self
96 }
97
98 pub fn with_context_lines(mut self, lines: usize) -> Self {
100 self.context_lines = lines;
101 self
102 }
103}
104
105#[derive(Debug, Clone)]
107struct FileMatch {
108 path: PathBuf,
109 matches: Vec<MatchResult>,
110 relevance: f32,
111}
112
113#[derive(Debug, Clone)]
115struct MatchResult {
116 line_number: usize,
117 line_content: String,
118 context_before: Vec<String>,
119 context_after: Vec<String>,
120}
121
122pub struct RipgrepContextProvider {
124 config: RipgrepContextConfig,
125}
126
127impl RipgrepContextProvider {
128 pub fn new(config: RipgrepContextConfig) -> Self {
130 Self { config }
131 }
132
133 async fn search_files(
135 &self,
136 query: &str,
137 max_results: usize,
138 ) -> anyhow::Result<Vec<FileMatch>> {
139 let root = self.config.root_path.clone();
140 let max_file_size = self.config.max_file_size;
141 let include = self.config.include_patterns.clone();
142 let exclude = self.config.exclude_patterns.clone();
143 let case_insensitive = self.config.case_insensitive;
144 let context_lines = self.config.context_lines;
145 let query = query.to_string();
146
147 tokio::task::spawn_blocking(move || {
149 let pattern = if case_insensitive {
151 format!("(?i){}", regex::escape(&query))
152 } else {
153 regex::escape(&query)
154 };
155
156 let regex = Regex::new(&pattern)?;
157
158 let mut file_matches = Vec::new();
159
160 let walker = WalkBuilder::new(&root)
161 .hidden(false)
162 .git_ignore(true)
163 .build();
164
165 for entry in walker {
166 let entry = entry.map_err(|e| anyhow::anyhow!("Walk error: {}", e))?;
167 let path = entry.path();
168
169 if !path.is_file() {
170 continue;
171 }
172
173 let metadata = fs::metadata(path)
174 .map_err(|e| anyhow::anyhow!("Metadata error for {}: {}", path.display(), e))?;
175
176 if metadata.len() > max_file_size as u64 {
177 continue;
178 }
179
180 if !matches_patterns(path, &include, true) {
181 continue;
182 }
183
184 if matches_patterns(path, &exclude, false) {
185 continue;
186 }
187
188 let content = match fs::read_to_string(path) {
189 Ok(c) => c,
190 Err(_) => continue, };
192
193 if content.trim().is_empty() {
194 continue;
195 }
196
197 let lines: Vec<&str> = content.lines().collect();
199 let mut matches = Vec::new();
200
201 for (line_idx, line) in lines.iter().enumerate() {
202 if regex.is_match(line) {
203 let context_before = if line_idx >= context_lines {
204 lines[line_idx - context_lines..line_idx]
205 .iter()
206 .map(|s| s.to_string())
207 .collect()
208 } else {
209 lines[0..line_idx].iter().map(|s| s.to_string()).collect()
210 };
211
212 let context_after = if line_idx + context_lines < lines.len() {
213 lines[line_idx + 1..=line_idx + context_lines]
214 .iter()
215 .map(|s| s.to_string())
216 .collect()
217 } else {
218 lines[line_idx + 1..]
219 .iter()
220 .map(|s| s.to_string())
221 .collect()
222 };
223
224 matches.push(MatchResult {
225 line_number: line_idx + 1,
226 line_content: line.to_string(),
227 context_before,
228 context_after,
229 });
230 }
231 }
232
233 if !matches.is_empty() {
234 let relevance = (matches.len() as f32) / (lines.len() as f32).sqrt();
236
237 file_matches.push(FileMatch {
238 path: path.to_path_buf(),
239 matches,
240 relevance,
241 });
242 }
243 }
244
245 file_matches.sort_by(|a, b| {
247 b.relevance
248 .partial_cmp(&a.relevance)
249 .unwrap_or(std::cmp::Ordering::Equal)
250 });
251 file_matches.truncate(max_results);
252
253 Ok::<_, anyhow::Error>(file_matches)
254 })
255 .await
256 .map_err(|e| anyhow::anyhow!("Spawn blocking failed: {}", e))?
257 }
258
259 fn format_match(&self, file_match: &FileMatch, depth: &crate::context::ContextDepth) -> String {
261 let mut output = String::new();
262 let path_str = file_match.path.display().to_string();
263
264 match depth {
265 crate::context::ContextDepth::Abstract => {
266 output.push_str(&format!(
268 "{}: {} matches\n",
269 path_str,
270 file_match.matches.len()
271 ));
272 }
273 crate::context::ContextDepth::Overview => {
274 output.push_str(&format!("{}:\n", path_str));
276 for (idx, m) in file_match.matches.iter().take(3).enumerate() {
277 if idx > 0 {
278 output.push('\n');
279 }
280 output.push_str(&format!(" Line {}:\n", m.line_number));
281 output.push_str(&format!(" {}\n", m.line_content));
282 }
283 if file_match.matches.len() > 3 {
284 output.push_str(&format!(
285 " ... and {} more matches\n",
286 file_match.matches.len() - 3
287 ));
288 }
289 }
290 crate::context::ContextDepth::Full => {
291 output.push_str(&format!("{}:\n", path_str));
293 for (idx, m) in file_match.matches.iter().enumerate() {
294 if idx > 0 {
295 output.push('\n');
296 }
297 output.push_str(&format!(" Line {}:\n", m.line_number));
298 for ctx in &m.context_before {
299 output.push_str(&format!(" {}\n", ctx));
300 }
301 output.push_str(&format!(" > {}\n", m.line_content));
302 for ctx in &m.context_after {
303 output.push_str(&format!(" {}\n", ctx));
304 }
305 }
306 }
307 }
308
309 output
310 }
311}
312
313#[async_trait]
314impl ContextProvider for RipgrepContextProvider {
315 fn name(&self) -> &str {
316 "ripgrep"
317 }
318
319 async fn query(&self, query: &ContextQuery) -> anyhow::Result<ContextResult> {
320 let file_matches = self.search_files(&query.query, query.max_results).await?;
321
322 let mut result = ContextResult::new("ripgrep");
323 let mut total_tokens = 0usize;
324
325 for file_match in file_matches {
326 if total_tokens >= query.max_tokens {
327 result.truncated = true;
328 break;
329 }
330
331 let content = self.format_match(&file_match, &query.depth);
332 let token_count = content.split_whitespace().count();
333
334 if total_tokens + token_count > query.max_tokens {
335 result.truncated = true;
336 break;
337 }
338
339 total_tokens += token_count;
340
341 result.add_item(
342 ContextItem::new(
343 file_match.path.to_string_lossy().to_string(),
344 ContextType::Resource,
345 content,
346 )
347 .with_token_count(token_count)
348 .with_relevance(file_match.relevance)
349 .with_source(format!("file:{}", file_match.path.display()))
350 .with_metadata("match_count", serde_json::json!(file_match.matches.len())),
351 );
352 }
353
354 Ok(result)
355 }
356}
357
358fn matches_patterns(path: &Path, patterns: &[String], default_if_empty: bool) -> bool {
364 if patterns.is_empty() {
365 return default_if_empty;
366 }
367 let path_str = path.to_string_lossy().replace('\\', "/");
368 patterns.iter().any(|pattern| {
369 glob::Pattern::new(pattern)
370 .map(|p| p.matches(&path_str))
371 .unwrap_or(false)
372 })
373}
374
375#[cfg(test)]
380mod tests {
381 use super::*;
382 use std::fs::File;
383 use std::io::Write;
384 use tempfile::TempDir;
385
386 fn setup_test_workspace() -> TempDir {
387 let dir = TempDir::new().unwrap();
388 let root = dir.path();
389
390 let mut f1 = File::create(root.join("main.rs")).unwrap();
392 writeln!(f1, "fn main() {{\n println!(\"Hello, world!\");\n}}").unwrap();
393
394 let mut f2 = File::create(root.join("lib.rs")).unwrap();
395 writeln!(
396 f2,
397 "pub mod auth;\npub mod database;\n\npub fn init() -> Result<()> {{\n Ok(())\n}}"
398 )
399 .unwrap();
400
401 let mut f3 = File::create(root.join("README.md")).unwrap();
402 writeln!(
403 f3,
404 "# My Project\n\nA Rust project for testing ripgrep context."
405 )
406 .unwrap();
407
408 std::fs::create_dir(root.join("src")).unwrap();
409 let mut f4 = File::create(root.join("src/auth.rs")).unwrap();
410 writeln!(
411 f4,
412 "use jwt::Token;\n\npub fn verify_token(token: &str) -> Result<Claims> {{\n // JWT verification logic\n todo!()\n}}"
413 )
414 .unwrap();
415
416 dir
417 }
418
419 #[test]
420 fn test_config_defaults() {
421 let config = RipgrepContextConfig::new("/tmp/test");
422 assert_eq!(config.root_path, PathBuf::from("/tmp/test"));
423 assert!(!config.include_patterns.is_empty());
424 assert!(!config.exclude_patterns.is_empty());
425 assert_eq!(config.max_file_size, 1024 * 1024);
426 assert!(config.case_insensitive);
427 assert_eq!(config.context_lines, 2);
428 }
429
430 #[test]
431 fn test_config_builders() {
432 let config = RipgrepContextConfig::new("/tmp")
433 .with_include_patterns(vec!["**/*.rs".to_string()])
434 .with_exclude_patterns(vec!["**/test/**".to_string()])
435 .with_max_file_size(2048)
436 .with_case_insensitive(false)
437 .with_context_lines(5);
438
439 assert_eq!(config.include_patterns, vec!["**/*.rs"]);
440 assert_eq!(config.exclude_patterns, vec!["**/test/**"]);
441 assert_eq!(config.max_file_size, 2048);
442 assert!(!config.case_insensitive);
443 assert_eq!(config.context_lines, 5);
444 }
445
446 #[tokio::test]
447 async fn test_provider_search() {
448 let dir = setup_test_workspace();
449 let config = RipgrepContextConfig::new(dir.path());
450 let provider = RipgrepContextProvider::new(config);
451
452 let query = ContextQuery::new("Rust");
453 let result = provider.query(&query).await.unwrap();
454
455 assert_eq!(result.provider, "ripgrep");
456 assert!(!result.items.is_empty());
457 assert!(result
459 .items
460 .iter()
461 .any(|item| item.content.contains("Rust")));
462 }
463
464 #[tokio::test]
465 async fn test_provider_case_insensitive() {
466 let dir = setup_test_workspace();
467 let config = RipgrepContextConfig::new(dir.path()).with_case_insensitive(true);
468 let provider = RipgrepContextProvider::new(config);
469
470 let query = ContextQuery::new("RUST");
471 let result = provider.query(&query).await.unwrap();
472
473 assert!(!result.items.is_empty());
474 }
475
476 #[tokio::test]
477 async fn test_provider_max_results() {
478 let dir = setup_test_workspace();
479 let config = RipgrepContextConfig::new(dir.path());
480 let provider = RipgrepContextProvider::new(config);
481
482 let query = ContextQuery::new("fn").with_max_results(1);
483 let result = provider.query(&query).await.unwrap();
484
485 assert!(result.items.len() <= 1);
486 }
487
488 #[tokio::test]
489 async fn test_provider_name() {
490 let dir = TempDir::new().unwrap();
491 let config = RipgrepContextConfig::new(dir.path());
492 let provider = RipgrepContextProvider::new(config);
493 assert_eq!(provider.name(), "ripgrep");
494 }
495
496 #[test]
497 fn test_matches_patterns_empty_default_true() {
498 assert!(matches_patterns(Path::new("test.rs"), &[], true));
499 }
500
501 #[test]
502 fn test_matches_patterns_empty_default_false() {
503 assert!(!matches_patterns(Path::new("test.rs"), &[], false));
504 }
505
506 #[test]
507 fn test_matches_patterns_include() {
508 let patterns = vec!["**/*.rs".to_string()];
509 assert!(matches_patterns(Path::new("src/main.rs"), &patterns, false));
510 assert!(!matches_patterns(
511 Path::new("src/main.py"),
512 &patterns,
513 false
514 ));
515 }
516}