1use crate::context::{ContextItem, ContextProvider, ContextQuery, ContextResult, ContextType};
11use async_trait::async_trait;
12use ignore::WalkBuilder;
13use regex::Regex;
14use std::fs;
15use std::path::{Path, PathBuf};
16
17#[derive(Debug, Clone)]
19pub struct RipgrepContextConfig {
20 pub root_path: PathBuf,
22 pub include_patterns: Vec<String>,
24 pub exclude_patterns: Vec<String>,
26 pub max_file_size: usize,
28 pub case_insensitive: bool,
30 pub context_lines: usize,
32}
33
34impl RipgrepContextConfig {
35 pub fn new(root_path: impl Into<PathBuf>) -> Self {
37 Self {
38 root_path: root_path.into(),
39 include_patterns: vec![
40 "**/*.rs".to_string(),
41 "**/*.py".to_string(),
42 "**/*.ts".to_string(),
43 "**/*.tsx".to_string(),
44 "**/*.js".to_string(),
45 "**/*.jsx".to_string(),
46 "**/*.go".to_string(),
47 "**/*.java".to_string(),
48 "**/*.c".to_string(),
49 "**/*.cpp".to_string(),
50 "**/*.h".to_string(),
51 "**/*.hpp".to_string(),
52 "**/*.md".to_string(),
53 "**/*.toml".to_string(),
54 "**/*.yaml".to_string(),
55 "**/*.yml".to_string(),
56 "**/*.json".to_string(),
57 ],
58 exclude_patterns: vec![
59 "**/target/**".to_string(),
60 "**/node_modules/**".to_string(),
61 "**/.git/**".to_string(),
62 "**/dist/**".to_string(),
63 "**/build/**".to_string(),
64 "**/*.lock".to_string(),
65 "**/vendor/**".to_string(),
66 "**/__pycache__/**".to_string(),
67 ],
68 max_file_size: 1024 * 1024, case_insensitive: true,
70 context_lines: 2,
71 }
72 }
73
74 pub fn with_include_patterns(mut self, patterns: Vec<String>) -> Self {
76 self.include_patterns = patterns;
77 self
78 }
79
80 pub fn with_exclude_patterns(mut self, patterns: Vec<String>) -> Self {
82 self.exclude_patterns = patterns;
83 self
84 }
85
86 pub fn with_max_file_size(mut self, size: usize) -> Self {
88 self.max_file_size = size;
89 self
90 }
91
92 pub fn with_case_insensitive(mut self, enabled: bool) -> Self {
94 self.case_insensitive = enabled;
95 self
96 }
97
98 pub fn with_context_lines(mut self, lines: usize) -> Self {
100 self.context_lines = lines;
101 self
102 }
103}
104
105#[derive(Debug, Clone)]
107struct FileMatch {
108 path: PathBuf,
109 matches: Vec<MatchResult>,
110 relevance: f32,
111}
112
113#[derive(Debug, Clone)]
115struct MatchResult {
116 line_number: usize,
117 line_content: String,
118 context_before: Vec<String>,
119 context_after: Vec<String>,
120}
121
122pub struct RipgrepContextProvider {
124 config: RipgrepContextConfig,
125}
126
127impl RipgrepContextProvider {
128 pub fn new(config: RipgrepContextConfig) -> Self {
130 Self { config }
131 }
132
133 async fn search_files(
135 &self,
136 query: &str,
137 max_results: usize,
138 ) -> anyhow::Result<Vec<FileMatch>> {
139 let root = self.config.root_path.clone();
140 let max_file_size = self.config.max_file_size;
141 let include = self.config.include_patterns.clone();
142 let exclude = self.config.exclude_patterns.clone();
143 let case_insensitive = self.config.case_insensitive;
144 let context_lines = self.config.context_lines;
145 let query = query.to_string();
146
147 tokio::task::spawn_blocking(move || {
149 let pattern = if case_insensitive {
151 format!("(?i){}", regex::escape(&query))
152 } else {
153 regex::escape(&query)
154 };
155
156 let regex = Regex::new(&pattern)?;
157
158 let mut file_matches = Vec::new();
159
160 let walker = WalkBuilder::new(&root)
161 .hidden(false)
162 .git_ignore(true)
163 .build();
164
165 for entry in walker {
166 let entry = entry.map_err(|e| anyhow::anyhow!("Walk error: {}", e))?;
167 let path = entry.path();
168
169 if !path.is_file() {
170 continue;
171 }
172
173 let metadata = fs::metadata(path)
174 .map_err(|e| anyhow::anyhow!("Metadata error for {}: {}", path.display(), e))?;
175
176 if metadata.len() > max_file_size as u64 {
177 continue;
178 }
179
180 if !matches_patterns(path, &include, true) {
181 continue;
182 }
183
184 if matches_patterns(path, &exclude, false) {
185 continue;
186 }
187
188 let content = match fs::read_to_string(path) {
189 Ok(c) => c,
190 Err(_) => continue, };
192
193 if content.trim().is_empty() {
194 continue;
195 }
196
197 let lines: Vec<&str> = content.lines().collect();
199 let mut matches = Vec::new();
200
201 for (line_idx, line) in lines.iter().enumerate() {
202 if regex.is_match(line) {
203 let context_before = if line_idx >= context_lines {
204 lines[line_idx - context_lines..line_idx]
205 .iter()
206 .map(|s| s.to_string())
207 .collect()
208 } else {
209 lines[0..line_idx].iter().map(|s| s.to_string()).collect()
210 };
211
212 let context_after = if line_idx + context_lines < lines.len() {
213 lines[line_idx + 1..=line_idx + context_lines]
214 .iter()
215 .map(|s| s.to_string())
216 .collect()
217 } else {
218 lines[line_idx + 1..]
219 .iter()
220 .map(|s| s.to_string())
221 .collect()
222 };
223
224 matches.push(MatchResult {
225 line_number: line_idx + 1,
226 line_content: line.to_string(),
227 context_before,
228 context_after,
229 });
230 }
231 }
232
233 if !matches.is_empty() {
234 let relevance = (matches.len() as f32) / (lines.len() as f32).sqrt();
236
237 file_matches.push(FileMatch {
238 path: path.to_path_buf(),
239 matches,
240 relevance,
241 });
242 }
243 }
244
245 file_matches.sort_by(|a, b| {
247 b.relevance
248 .partial_cmp(&a.relevance)
249 .unwrap_or(std::cmp::Ordering::Equal)
250 });
251 file_matches.truncate(max_results);
252
253 Ok::<_, anyhow::Error>(file_matches)
254 })
255 .await
256 .map_err(|e| anyhow::anyhow!("Spawn blocking failed: {}", e))?
257 }
258
259 fn format_match(&self, file_match: &FileMatch, depth: &crate::context::ContextDepth) -> String {
261 let mut output = String::new();
262 let path_str = file_match.path.display().to_string();
263
264 match depth {
265 crate::context::ContextDepth::Abstract => {
266 output.push_str(&format!(
268 "{}: {} matches\n",
269 path_str,
270 file_match.matches.len()
271 ));
272 }
273 crate::context::ContextDepth::Overview => {
274 output.push_str(&format!("{}:\n", path_str));
276 for (idx, m) in file_match.matches.iter().take(3).enumerate() {
277 if idx > 0 {
278 output.push('\n');
279 }
280 output.push_str(&format!(" Line {}:\n", m.line_number));
281 output.push_str(&format!(" {}\n", m.line_content));
282 }
283 if file_match.matches.len() > 3 {
284 output.push_str(&format!(
285 " ... and {} more matches\n",
286 file_match.matches.len() - 3
287 ));
288 }
289 }
290 crate::context::ContextDepth::Full => {
291 output.push_str(&format!("{}:\n", path_str));
293 for (idx, m) in file_match.matches.iter().enumerate() {
294 if idx > 0 {
295 output.push('\n');
296 }
297 output.push_str(&format!(" Line {}:\n", m.line_number));
298 for ctx in &m.context_before {
299 output.push_str(&format!(" {}\n", ctx));
300 }
301 output.push_str(&format!(" > {}\n", m.line_content));
302 for ctx in &m.context_after {
303 output.push_str(&format!(" {}\n", ctx));
304 }
305 }
306 }
307 }
308
309 output
310 }
311}
312
313#[async_trait]
314impl ContextProvider for RipgrepContextProvider {
315 fn name(&self) -> &str {
316 "ripgrep"
317 }
318
319 async fn query(&self, query: &ContextQuery) -> anyhow::Result<ContextResult> {
320 let file_matches = self.search_files(&query.query, query.max_results).await?;
321
322 let mut result = ContextResult::new("ripgrep");
323 let mut total_tokens = 0usize;
324
325 for file_match in file_matches {
326 if total_tokens >= query.max_tokens {
327 result.truncated = true;
328 break;
329 }
330
331 let content = self.format_match(&file_match, &query.depth);
332 let token_count = content.split_whitespace().count();
333
334 if total_tokens + token_count > query.max_tokens {
335 result.truncated = true;
336 break;
337 }
338
339 total_tokens += token_count;
340
341 result.add_item(
342 ContextItem::new(
343 file_match.path.to_string_lossy().to_string(),
344 ContextType::Resource,
345 content,
346 )
347 .with_token_count(token_count)
348 .with_relevance(file_match.relevance)
349 .with_source(format!("file:{}", file_match.path.display()))
350 .with_provenance("ripgrep")
351 .with_priority(0.6)
352 .with_trust(0.8)
353 .with_freshness(0.75)
354 .with_metadata("match_count", serde_json::json!(file_match.matches.len())),
355 );
356 }
357
358 Ok(result)
359 }
360}
361
362fn matches_patterns(path: &Path, patterns: &[String], default_if_empty: bool) -> bool {
368 if patterns.is_empty() {
369 return default_if_empty;
370 }
371 let path_str = path.to_string_lossy().replace('\\', "/");
372 patterns.iter().any(|pattern| {
373 glob::Pattern::new(pattern)
374 .map(|p| p.matches(&path_str))
375 .unwrap_or(false)
376 })
377}
378
379#[cfg(test)]
384mod tests {
385 use super::*;
386 use std::fs::File;
387 use std::io::Write;
388 use tempfile::TempDir;
389
390 fn setup_test_workspace() -> TempDir {
391 let dir = TempDir::new().unwrap();
392 let root = dir.path();
393
394 let mut f1 = File::create(root.join("main.rs")).unwrap();
396 writeln!(f1, "fn main() {{\n println!(\"Hello, world!\");\n}}").unwrap();
397
398 let mut f2 = File::create(root.join("lib.rs")).unwrap();
399 writeln!(
400 f2,
401 "pub mod auth;\npub mod database;\n\npub fn init() -> Result<()> {{\n Ok(())\n}}"
402 )
403 .unwrap();
404
405 let mut f3 = File::create(root.join("README.md")).unwrap();
406 writeln!(
407 f3,
408 "# My Project\n\nA Rust project for testing ripgrep context."
409 )
410 .unwrap();
411
412 std::fs::create_dir(root.join("src")).unwrap();
413 let mut f4 = File::create(root.join("src/auth.rs")).unwrap();
414 writeln!(
415 f4,
416 "use jwt::Token;\n\npub fn verify_token(token: &str) -> Result<Claims> {{\n // JWT verification logic\n todo!()\n}}"
417 )
418 .unwrap();
419
420 dir
421 }
422
423 #[test]
424 fn test_config_defaults() {
425 let config = RipgrepContextConfig::new("/tmp/test");
426 assert_eq!(config.root_path, PathBuf::from("/tmp/test"));
427 assert!(!config.include_patterns.is_empty());
428 assert!(!config.exclude_patterns.is_empty());
429 assert_eq!(config.max_file_size, 1024 * 1024);
430 assert!(config.case_insensitive);
431 assert_eq!(config.context_lines, 2);
432 }
433
434 #[test]
435 fn test_config_builders() {
436 let config = RipgrepContextConfig::new("/tmp")
437 .with_include_patterns(vec!["**/*.rs".to_string()])
438 .with_exclude_patterns(vec!["**/test/**".to_string()])
439 .with_max_file_size(2048)
440 .with_case_insensitive(false)
441 .with_context_lines(5);
442
443 assert_eq!(config.include_patterns, vec!["**/*.rs"]);
444 assert_eq!(config.exclude_patterns, vec!["**/test/**"]);
445 assert_eq!(config.max_file_size, 2048);
446 assert!(!config.case_insensitive);
447 assert_eq!(config.context_lines, 5);
448 }
449
450 #[tokio::test]
451 async fn test_provider_search() {
452 let dir = setup_test_workspace();
453 let config = RipgrepContextConfig::new(dir.path());
454 let provider = RipgrepContextProvider::new(config);
455
456 let query = ContextQuery::new("Rust");
457 let result = provider.query(&query).await.unwrap();
458
459 assert_eq!(result.provider, "ripgrep");
460 assert!(!result.items.is_empty());
461 assert!(result
463 .items
464 .iter()
465 .any(|item| item.content.contains("Rust")));
466 }
467
468 #[tokio::test]
469 async fn test_provider_case_insensitive() {
470 let dir = setup_test_workspace();
471 let config = RipgrepContextConfig::new(dir.path()).with_case_insensitive(true);
472 let provider = RipgrepContextProvider::new(config);
473
474 let query = ContextQuery::new("RUST");
475 let result = provider.query(&query).await.unwrap();
476
477 assert!(!result.items.is_empty());
478 }
479
480 #[tokio::test]
481 async fn test_provider_max_results() {
482 let dir = setup_test_workspace();
483 let config = RipgrepContextConfig::new(dir.path());
484 let provider = RipgrepContextProvider::new(config);
485
486 let query = ContextQuery::new("fn").with_max_results(1);
487 let result = provider.query(&query).await.unwrap();
488
489 assert!(result.items.len() <= 1);
490 }
491
492 #[tokio::test]
493 async fn test_provider_name() {
494 let dir = TempDir::new().unwrap();
495 let config = RipgrepContextConfig::new(dir.path());
496 let provider = RipgrepContextProvider::new(config);
497 assert_eq!(provider.name(), "ripgrep");
498 }
499
500 #[test]
501 fn test_matches_patterns_empty_default_true() {
502 assert!(matches_patterns(Path::new("test.rs"), &[], true));
503 }
504
505 #[test]
506 fn test_matches_patterns_empty_default_false() {
507 assert!(!matches_patterns(Path::new("test.rs"), &[], false));
508 }
509
510 #[test]
511 fn test_matches_patterns_include() {
512 let patterns = vec!["**/*.rs".to_string()];
513 assert!(matches_patterns(Path::new("src/main.rs"), &patterns, false));
514 assert!(!matches_patterns(
515 Path::new("src/main.py"),
516 &patterns,
517 false
518 ));
519 }
520}