Skip to main content

a3s_code_core/context/
ripgrep_provider.rs

1//! Ripgrep Context Provider
2//!
3//! Provides indexless, real-time code search using ripgrep-style pattern matching.
4//! Unlike vector-based RAG, this approach:
5//! - Requires no pre-indexing or embedding generation
6//! - Works directly on raw source files
7//! - Supports real-time code changes without re-indexing
8//! - Uses fast regex-based search with relevance scoring
9
10use crate::context::{ContextItem, ContextProvider, ContextQuery, ContextResult, ContextType};
11use async_trait::async_trait;
12use ignore::WalkBuilder;
13use regex::Regex;
14use std::fs;
15use std::path::{Path, PathBuf};
16
17/// Ripgrep context provider configuration
18#[derive(Debug, Clone)]
19pub struct RipgrepContextConfig {
20    /// Root directory to search
21    pub root_path: PathBuf,
22    /// Include patterns (glob syntax: ["**/*.rs", "**/*.md"])
23    pub include_patterns: Vec<String>,
24    /// Exclude patterns (glob syntax: ["**/target/**", "**/node_modules/**"])
25    pub exclude_patterns: Vec<String>,
26    /// Maximum file size in bytes (default: 1MB)
27    pub max_file_size: usize,
28    /// Case insensitive search (default: true)
29    pub case_insensitive: bool,
30    /// Number of context lines before/after match (default: 2)
31    pub context_lines: usize,
32}
33
34impl RipgrepContextConfig {
35    /// Create a new config with default settings
36    pub fn new(root_path: impl Into<PathBuf>) -> Self {
37        Self {
38            root_path: root_path.into(),
39            include_patterns: vec![
40                "**/*.rs".to_string(),
41                "**/*.py".to_string(),
42                "**/*.ts".to_string(),
43                "**/*.tsx".to_string(),
44                "**/*.js".to_string(),
45                "**/*.jsx".to_string(),
46                "**/*.go".to_string(),
47                "**/*.java".to_string(),
48                "**/*.c".to_string(),
49                "**/*.cpp".to_string(),
50                "**/*.h".to_string(),
51                "**/*.hpp".to_string(),
52                "**/*.md".to_string(),
53                "**/*.toml".to_string(),
54                "**/*.yaml".to_string(),
55                "**/*.yml".to_string(),
56                "**/*.json".to_string(),
57            ],
58            exclude_patterns: vec![
59                "**/target/**".to_string(),
60                "**/node_modules/**".to_string(),
61                "**/.git/**".to_string(),
62                "**/dist/**".to_string(),
63                "**/build/**".to_string(),
64                "**/*.lock".to_string(),
65                "**/vendor/**".to_string(),
66                "**/__pycache__/**".to_string(),
67            ],
68            max_file_size: 1024 * 1024, // 1MB
69            case_insensitive: true,
70            context_lines: 2,
71        }
72    }
73
74    /// Set include patterns
75    pub fn with_include_patterns(mut self, patterns: Vec<String>) -> Self {
76        self.include_patterns = patterns;
77        self
78    }
79
80    /// Set exclude patterns
81    pub fn with_exclude_patterns(mut self, patterns: Vec<String>) -> Self {
82        self.exclude_patterns = patterns;
83        self
84    }
85
86    /// Set max file size
87    pub fn with_max_file_size(mut self, size: usize) -> Self {
88        self.max_file_size = size;
89        self
90    }
91
92    /// Set case sensitivity
93    pub fn with_case_insensitive(mut self, enabled: bool) -> Self {
94        self.case_insensitive = enabled;
95        self
96    }
97
98    /// Set context lines
99    pub fn with_context_lines(mut self, lines: usize) -> Self {
100        self.context_lines = lines;
101        self
102    }
103}
104
105/// Search result from a single file
106#[derive(Debug, Clone)]
107struct FileMatch {
108    path: PathBuf,
109    matches: Vec<MatchResult>,
110    relevance: f32,
111}
112
113/// A single match within a file
114#[derive(Debug, Clone)]
115struct MatchResult {
116    line_number: usize,
117    line_content: String,
118    context_before: Vec<String>,
119    context_after: Vec<String>,
120}
121
122/// Ripgrep context provider
123pub struct RipgrepContextProvider {
124    config: RipgrepContextConfig,
125}
126
127impl RipgrepContextProvider {
128    /// Create a new ripgrep context provider
129    pub fn new(config: RipgrepContextConfig) -> Self {
130        Self { config }
131    }
132
133    /// Search files for pattern matches
134    async fn search_files(
135        &self,
136        query: &str,
137        max_results: usize,
138    ) -> anyhow::Result<Vec<FileMatch>> {
139        let root = self.config.root_path.clone();
140        let max_file_size = self.config.max_file_size;
141        let include = self.config.include_patterns.clone();
142        let exclude = self.config.exclude_patterns.clone();
143        let case_insensitive = self.config.case_insensitive;
144        let context_lines = self.config.context_lines;
145        let query = query.to_string();
146
147        // Run search in blocking task
148        tokio::task::spawn_blocking(move || {
149            // Build regex pattern
150            let pattern = if case_insensitive {
151                format!("(?i){}", regex::escape(&query))
152            } else {
153                regex::escape(&query)
154            };
155
156            let regex = Regex::new(&pattern)?;
157
158            let mut file_matches = Vec::new();
159
160            let walker = WalkBuilder::new(&root)
161                .hidden(false)
162                .git_ignore(true)
163                .build();
164
165            for entry in walker {
166                let entry = entry.map_err(|e| anyhow::anyhow!("Walk error: {}", e))?;
167                let path = entry.path();
168
169                if !path.is_file() {
170                    continue;
171                }
172
173                let metadata = fs::metadata(path)
174                    .map_err(|e| anyhow::anyhow!("Metadata error for {}: {}", path.display(), e))?;
175
176                if metadata.len() > max_file_size as u64 {
177                    continue;
178                }
179
180                if !matches_patterns(path, &include, true) {
181                    continue;
182                }
183
184                if matches_patterns(path, &exclude, false) {
185                    continue;
186                }
187
188                let content = match fs::read_to_string(path) {
189                    Ok(c) => c,
190                    Err(_) => continue, // Skip binary files
191                };
192
193                if content.trim().is_empty() {
194                    continue;
195                }
196
197                // Search for matches in this file
198                let lines: Vec<&str> = content.lines().collect();
199                let mut matches = Vec::new();
200
201                for (line_idx, line) in lines.iter().enumerate() {
202                    if regex.is_match(line) {
203                        let context_before = if line_idx >= context_lines {
204                            lines[line_idx - context_lines..line_idx]
205                                .iter()
206                                .map(|s| s.to_string())
207                                .collect()
208                        } else {
209                            lines[0..line_idx].iter().map(|s| s.to_string()).collect()
210                        };
211
212                        let context_after = if line_idx + context_lines < lines.len() {
213                            lines[line_idx + 1..=line_idx + context_lines]
214                                .iter()
215                                .map(|s| s.to_string())
216                                .collect()
217                        } else {
218                            lines[line_idx + 1..]
219                                .iter()
220                                .map(|s| s.to_string())
221                                .collect()
222                        };
223
224                        matches.push(MatchResult {
225                            line_number: line_idx + 1,
226                            line_content: line.to_string(),
227                            context_before,
228                            context_after,
229                        });
230                    }
231                }
232
233                if !matches.is_empty() {
234                    // Calculate relevance score based on match count and file size
235                    let relevance = (matches.len() as f32) / (lines.len() as f32).sqrt();
236
237                    file_matches.push(FileMatch {
238                        path: path.to_path_buf(),
239                        matches,
240                        relevance,
241                    });
242                }
243            }
244
245            // Sort by relevance (descending)
246            file_matches.sort_by(|a, b| {
247                b.relevance
248                    .partial_cmp(&a.relevance)
249                    .unwrap_or(std::cmp::Ordering::Equal)
250            });
251            file_matches.truncate(max_results);
252
253            Ok::<_, anyhow::Error>(file_matches)
254        })
255        .await
256        .map_err(|e| anyhow::anyhow!("Spawn blocking failed: {}", e))?
257    }
258
259    /// Format match results into context content
260    fn format_match(&self, file_match: &FileMatch, depth: &crate::context::ContextDepth) -> String {
261        let mut output = String::new();
262        let path_str = file_match.path.display().to_string();
263
264        match depth {
265            crate::context::ContextDepth::Abstract => {
266                // Just show file path and match count
267                output.push_str(&format!(
268                    "{}: {} matches\n",
269                    path_str,
270                    file_match.matches.len()
271                ));
272            }
273            crate::context::ContextDepth::Overview => {
274                // Show first few matches with limited context
275                output.push_str(&format!("{}:\n", path_str));
276                for (idx, m) in file_match.matches.iter().take(3).enumerate() {
277                    if idx > 0 {
278                        output.push('\n');
279                    }
280                    output.push_str(&format!("  Line {}:\n", m.line_number));
281                    output.push_str(&format!("    {}\n", m.line_content));
282                }
283                if file_match.matches.len() > 3 {
284                    output.push_str(&format!(
285                        "  ... and {} more matches\n",
286                        file_match.matches.len() - 3
287                    ));
288                }
289            }
290            crate::context::ContextDepth::Full => {
291                // Show all matches with full context
292                output.push_str(&format!("{}:\n", path_str));
293                for (idx, m) in file_match.matches.iter().enumerate() {
294                    if idx > 0 {
295                        output.push('\n');
296                    }
297                    output.push_str(&format!("  Line {}:\n", m.line_number));
298                    for ctx in &m.context_before {
299                        output.push_str(&format!("    {}\n", ctx));
300                    }
301                    output.push_str(&format!("  > {}\n", m.line_content));
302                    for ctx in &m.context_after {
303                        output.push_str(&format!("    {}\n", ctx));
304                    }
305                }
306            }
307        }
308
309        output
310    }
311}
312
313#[async_trait]
314impl ContextProvider for RipgrepContextProvider {
315    fn name(&self) -> &str {
316        "ripgrep"
317    }
318
319    async fn query(&self, query: &ContextQuery) -> anyhow::Result<ContextResult> {
320        let file_matches = self.search_files(&query.query, query.max_results).await?;
321
322        let mut result = ContextResult::new("ripgrep");
323        let mut total_tokens = 0usize;
324
325        for file_match in file_matches {
326            if total_tokens >= query.max_tokens {
327                result.truncated = true;
328                break;
329            }
330
331            let content = self.format_match(&file_match, &query.depth);
332            let token_count = content.split_whitespace().count();
333
334            if total_tokens + token_count > query.max_tokens {
335                result.truncated = true;
336                break;
337            }
338
339            total_tokens += token_count;
340
341            result.add_item(
342                ContextItem::new(
343                    file_match.path.to_string_lossy().to_string(),
344                    ContextType::Resource,
345                    content,
346                )
347                .with_token_count(token_count)
348                .with_relevance(file_match.relevance)
349                .with_source(format!("file:{}", file_match.path.display()))
350                .with_metadata("match_count", serde_json::json!(file_match.matches.len())),
351            );
352        }
353
354        Ok(result)
355    }
356}
357
358// ============================================================================
359// Helpers
360// ============================================================================
361
362/// Check if a path matches any of the given glob patterns
363fn matches_patterns(path: &Path, patterns: &[String], default_if_empty: bool) -> bool {
364    if patterns.is_empty() {
365        return default_if_empty;
366    }
367    let path_str = path.to_string_lossy().replace('\\', "/");
368    patterns.iter().any(|pattern| {
369        glob::Pattern::new(pattern)
370            .map(|p| p.matches(&path_str))
371            .unwrap_or(false)
372    })
373}
374
375// ============================================================================
376// Tests
377// ============================================================================
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382    use std::fs::File;
383    use std::io::Write;
384    use tempfile::TempDir;
385
386    fn setup_test_workspace() -> TempDir {
387        let dir = TempDir::new().unwrap();
388        let root = dir.path();
389
390        // Create test files
391        let mut f1 = File::create(root.join("main.rs")).unwrap();
392        writeln!(f1, "fn main() {{\n    println!(\"Hello, world!\");\n}}").unwrap();
393
394        let mut f2 = File::create(root.join("lib.rs")).unwrap();
395        writeln!(
396            f2,
397            "pub mod auth;\npub mod database;\n\npub fn init() -> Result<()> {{\n    Ok(())\n}}"
398        )
399        .unwrap();
400
401        let mut f3 = File::create(root.join("README.md")).unwrap();
402        writeln!(
403            f3,
404            "# My Project\n\nA Rust project for testing ripgrep context."
405        )
406        .unwrap();
407
408        std::fs::create_dir(root.join("src")).unwrap();
409        let mut f4 = File::create(root.join("src/auth.rs")).unwrap();
410        writeln!(
411            f4,
412            "use jwt::Token;\n\npub fn verify_token(token: &str) -> Result<Claims> {{\n    // JWT verification logic\n    todo!()\n}}"
413        )
414        .unwrap();
415
416        dir
417    }
418
419    #[test]
420    fn test_config_defaults() {
421        let config = RipgrepContextConfig::new("/tmp/test");
422        assert_eq!(config.root_path, PathBuf::from("/tmp/test"));
423        assert!(!config.include_patterns.is_empty());
424        assert!(!config.exclude_patterns.is_empty());
425        assert_eq!(config.max_file_size, 1024 * 1024);
426        assert!(config.case_insensitive);
427        assert_eq!(config.context_lines, 2);
428    }
429
430    #[test]
431    fn test_config_builders() {
432        let config = RipgrepContextConfig::new("/tmp")
433            .with_include_patterns(vec!["**/*.rs".to_string()])
434            .with_exclude_patterns(vec!["**/test/**".to_string()])
435            .with_max_file_size(2048)
436            .with_case_insensitive(false)
437            .with_context_lines(5);
438
439        assert_eq!(config.include_patterns, vec!["**/*.rs"]);
440        assert_eq!(config.exclude_patterns, vec!["**/test/**"]);
441        assert_eq!(config.max_file_size, 2048);
442        assert!(!config.case_insensitive);
443        assert_eq!(config.context_lines, 5);
444    }
445
446    #[tokio::test]
447    async fn test_provider_search() {
448        let dir = setup_test_workspace();
449        let config = RipgrepContextConfig::new(dir.path());
450        let provider = RipgrepContextProvider::new(config);
451
452        let query = ContextQuery::new("Rust");
453        let result = provider.query(&query).await.unwrap();
454
455        assert_eq!(result.provider, "ripgrep");
456        assert!(!result.items.is_empty());
457        // Should find "Rust" in README.md
458        assert!(result
459            .items
460            .iter()
461            .any(|item| item.content.contains("Rust")));
462    }
463
464    #[tokio::test]
465    async fn test_provider_case_insensitive() {
466        let dir = setup_test_workspace();
467        let config = RipgrepContextConfig::new(dir.path()).with_case_insensitive(true);
468        let provider = RipgrepContextProvider::new(config);
469
470        let query = ContextQuery::new("RUST");
471        let result = provider.query(&query).await.unwrap();
472
473        assert!(!result.items.is_empty());
474    }
475
476    #[tokio::test]
477    async fn test_provider_max_results() {
478        let dir = setup_test_workspace();
479        let config = RipgrepContextConfig::new(dir.path());
480        let provider = RipgrepContextProvider::new(config);
481
482        let query = ContextQuery::new("fn").with_max_results(1);
483        let result = provider.query(&query).await.unwrap();
484
485        assert!(result.items.len() <= 1);
486    }
487
488    #[tokio::test]
489    async fn test_provider_name() {
490        let dir = TempDir::new().unwrap();
491        let config = RipgrepContextConfig::new(dir.path());
492        let provider = RipgrepContextProvider::new(config);
493        assert_eq!(provider.name(), "ripgrep");
494    }
495
496    #[test]
497    fn test_matches_patterns_empty_default_true() {
498        assert!(matches_patterns(Path::new("test.rs"), &[], true));
499    }
500
501    #[test]
502    fn test_matches_patterns_empty_default_false() {
503        assert!(!matches_patterns(Path::new("test.rs"), &[], false));
504    }
505
506    #[test]
507    fn test_matches_patterns_include() {
508        let patterns = vec!["**/*.rs".to_string()];
509        assert!(matches_patterns(Path::new("src/main.rs"), &patterns, false));
510        assert!(!matches_patterns(
511            Path::new("src/main.py"),
512            &patterns,
513            false
514        ));
515    }
516}