Skip to main content

aster/tools/search/
ripgrep.rs

1//! Ripgrep Integration Module
2//!
3//! Provides enhanced ripgrep support with vendored binary detection,
4//! JSON output parsing, and file listing capabilities.
5
6use serde::{Deserialize, Serialize};
7use std::path::PathBuf;
8use std::process::Command;
9
10/// Ripgrep version
11pub const RG_VERSION: &str = "14.1.0";
12
13/// Platform binary names
14#[cfg(target_os = "macos")]
15pub const PLATFORM_BINARY: &str = if cfg!(target_arch = "aarch64") {
16    "rg-darwin-arm64"
17} else {
18    "rg-darwin-x64"
19};
20
21#[cfg(target_os = "linux")]
22pub const PLATFORM_BINARY: &str = if cfg!(target_arch = "aarch64") {
23    "rg-linux-arm64"
24} else {
25    "rg-linux-x64"
26};
27
28#[cfg(target_os = "windows")]
29pub const PLATFORM_BINARY: &str = "rg-win32-x64.exe";
30
31/// Ripgrep search options
32#[derive(Debug, Clone, Default)]
33pub struct RipgrepOptions {
34    pub cwd: Option<PathBuf>,
35    pub pattern: String,
36    pub paths: Vec<PathBuf>,
37    pub glob: Option<String>,
38    pub file_type: Option<String>,
39    pub ignore_case: bool,
40    pub fixed_strings: bool,
41    pub max_count: Option<usize>,
42    pub context: Option<usize>,
43    pub before_context: Option<usize>,
44    pub after_context: Option<usize>,
45    pub files_with_matches: bool,
46    pub count: bool,
47    pub json: bool,
48    pub no_ignore: bool,
49    pub hidden: bool,
50    pub multiline: bool,
51    pub timeout: Option<u64>,
52}
53
54/// A single ripgrep match
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct RipgrepMatch {
57    pub path: String,
58    pub line_number: usize,
59    pub line_content: String,
60    pub match_start: usize,
61    pub match_end: usize,
62}
63
64/// Ripgrep search result
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct RipgrepResult {
67    pub matches: Vec<RipgrepMatch>,
68    pub files_searched: usize,
69    pub match_count: usize,
70    pub truncated: bool,
71}
72
73/// Get vendored ripgrep path based on platform
74pub fn get_vendored_rg_path() -> Option<PathBuf> {
75    // Check multiple possible locations
76    let possible_paths = [
77        // Package vendor directory
78        PathBuf::from("vendor/ripgrep").join(PLATFORM_BINARY),
79        // Home directory
80        dirs::home_dir()
81            .map(|h| h.join(".aster/bin").join(PLATFORM_BINARY))
82            .unwrap_or_default(),
83        // Current executable directory
84        std::env::current_exe()
85            .ok()
86            .and_then(|p| {
87                p.parent()
88                    .map(|p| p.join("vendor/ripgrep").join(PLATFORM_BINARY))
89            })
90            .unwrap_or_default(),
91    ];
92
93    for rg_path in &possible_paths {
94        if rg_path.exists() {
95            return Some(rg_path.clone());
96        }
97    }
98
99    None
100}
101
102/// Get system ripgrep path
103pub fn get_system_rg_path() -> Option<PathBuf> {
104    #[cfg(target_os = "windows")]
105    let cmd = Command::new("where").arg("rg").output();
106
107    #[cfg(not(target_os = "windows"))]
108    let cmd = Command::new("which").arg("rg").output();
109
110    match cmd {
111        Ok(output) if output.status.success() => {
112            let path = String::from_utf8_lossy(&output.stdout)
113                .lines()
114                .next()
115                .map(|s| PathBuf::from(s.trim()));
116            path
117        }
118        _ => None,
119    }
120}
121
122/// Check if should use system ripgrep based on environment variable
123fn should_use_system_ripgrep() -> bool {
124    std::env::var("USE_BUILTIN_RIPGREP")
125        .map(|v| matches!(v.to_lowercase().as_str(), "1" | "true" | "yes" | "on"))
126        .unwrap_or(false)
127}
128
129/// Get available ripgrep path
130/// Respects USE_BUILTIN_RIPGREP environment variable
131pub fn get_rg_path() -> Option<PathBuf> {
132    if should_use_system_ripgrep() {
133        // Prefer system version when env var is set
134        get_system_rg_path().or_else(get_vendored_rg_path)
135    } else {
136        // Default: prefer vendored version
137        get_vendored_rg_path().or_else(get_system_rg_path)
138    }
139}
140
141/// Check if ripgrep is available
142pub fn is_ripgrep_available() -> bool {
143    get_rg_path().is_some()
144}
145
146/// Get ripgrep version
147pub fn get_ripgrep_version() -> Option<String> {
148    let rg_path = get_rg_path()?;
149
150    let output = Command::new(&rg_path).arg("--version").output().ok()?;
151
152    if !output.status.success() {
153        return None;
154    }
155
156    let version_str = String::from_utf8_lossy(&output.stdout);
157    // Parse "ripgrep X.Y.Z" format
158    version_str
159        .lines()
160        .next()
161        .and_then(|line| line.split_whitespace().nth(1).map(|v| v.to_string()))
162}
163
164/// Build ripgrep command arguments
165fn build_rg_args(options: &RipgrepOptions) -> Vec<String> {
166    let mut args = Vec::new();
167
168    // Fixed strings mode
169    if options.fixed_strings {
170        args.push("-F".to_string());
171    }
172
173    // Case insensitive
174    if options.ignore_case {
175        args.push("-i".to_string());
176    }
177
178    // Multiline mode
179    if options.multiline {
180        args.push("-U".to_string());
181        args.push("--multiline-dotall".to_string());
182    }
183
184    // Output format
185    if options.json {
186        args.push("--json".to_string());
187    } else {
188        args.push("--line-number".to_string());
189        args.push("--column".to_string());
190    }
191
192    // Glob filter
193    if let Some(ref glob) = options.glob {
194        args.push("--glob".to_string());
195        args.push(glob.clone());
196    }
197
198    // File type filter
199    if let Some(ref file_type) = options.file_type {
200        args.push("--type".to_string());
201        args.push(file_type.clone());
202    }
203
204    // Ignore settings
205    if options.no_ignore {
206        args.push("--no-ignore".to_string());
207    }
208
209    if options.hidden {
210        args.push("--hidden".to_string());
211    }
212
213    // Max count
214    if let Some(max) = options.max_count {
215        args.push("--max-count".to_string());
216        args.push(max.to_string());
217    }
218
219    // Files with matches only
220    if options.files_with_matches {
221        args.push("--files-with-matches".to_string());
222    }
223
224    // Count mode
225    if options.count {
226        args.push("--count".to_string());
227    }
228
229    // Context lines
230    if let Some(ctx) = options.context {
231        args.push("-C".to_string());
232        args.push(ctx.to_string());
233    } else {
234        if let Some(before) = options.before_context {
235            args.push("-B".to_string());
236            args.push(before.to_string());
237        }
238        if let Some(after) = options.after_context {
239            args.push("-A".to_string());
240            args.push(after.to_string());
241        }
242    }
243
244    // Pattern (use -- to separate from paths)
245    args.push("--".to_string());
246    args.push(options.pattern.clone());
247
248    // Search paths
249    if options.paths.is_empty() {
250        args.push(".".to_string());
251    } else {
252        for path in &options.paths {
253            args.push(path.display().to_string());
254        }
255    }
256
257    args
258}
259
260/// JSON output types from ripgrep
261#[derive(Debug, Deserialize)]
262#[serde(tag = "type", rename_all = "lowercase")]
263enum RgJsonMessage {
264    Begin {
265        data: RgBeginData,
266    },
267    Match {
268        data: RgMatchData,
269    },
270    End {
271        data: RgEndData,
272    },
273    Summary {
274        data: RgSummaryData,
275    },
276    #[serde(other)]
277    Other,
278}
279
280#[derive(Debug, Deserialize)]
281struct RgBeginData {
282    path: RgPath,
283}
284
285#[derive(Debug, Deserialize)]
286struct RgMatchData {
287    path: RgPath,
288    lines: RgLines,
289    line_number: usize,
290    submatches: Vec<RgSubmatch>,
291}
292
293#[derive(Debug, Deserialize)]
294struct RgEndData {
295    path: RgPath,
296    stats: Option<RgStats>,
297}
298
299#[derive(Debug, Deserialize)]
300struct RgSummaryData {
301    stats: RgStats,
302}
303
304#[derive(Debug, Deserialize)]
305struct RgPath {
306    text: String,
307}
308
309#[derive(Debug, Deserialize)]
310struct RgLines {
311    text: String,
312}
313
314#[derive(Debug, Deserialize)]
315struct RgSubmatch {
316    start: usize,
317    end: usize,
318}
319
320#[derive(Debug, Deserialize)]
321struct RgStats {
322    matched_lines: Option<usize>,
323    matches: Option<usize>,
324}
325
326/// Parse JSON output from ripgrep
327fn parse_json_output(output: &str) -> RipgrepResult {
328    let mut matches = Vec::new();
329    let mut files = std::collections::HashSet::new();
330    let mut match_count = 0;
331
332    for line in output.lines() {
333        if line.is_empty() {
334            continue;
335        }
336
337        if let Ok(RgJsonMessage::Match { data }) = serde_json::from_str::<RgJsonMessage>(line) {
338            files.insert(data.path.text.clone());
339
340            for submatch in &data.submatches {
341                matches.push(RipgrepMatch {
342                    path: data.path.text.clone(),
343                    line_number: data.line_number,
344                    line_content: data.lines.text.trim_end_matches('\n').to_string(),
345                    match_start: submatch.start,
346                    match_end: submatch.end,
347                });
348                match_count += 1;
349            }
350        }
351    }
352
353    RipgrepResult {
354        matches,
355        files_searched: files.len(),
356        match_count,
357        truncated: false,
358    }
359}
360
361/// Execute ripgrep search asynchronously
362pub async fn search(options: RipgrepOptions) -> Result<RipgrepResult, String> {
363    let rg_path = get_rg_path().ok_or("ripgrep is not available")?;
364
365    let mut search_options = options.clone();
366    search_options.json = true;
367
368    let args = build_rg_args(&search_options);
369
370    let mut cmd = tokio::process::Command::new(&rg_path);
371    cmd.args(&args);
372
373    if let Some(ref cwd) = options.cwd {
374        cmd.current_dir(cwd);
375    }
376
377    let output = cmd
378        .output()
379        .await
380        .map_err(|e| format!("Failed to execute ripgrep: {}", e))?;
381
382    // ripgrep returns 1 when no matches found, which is not an error
383    if !output.status.success() && output.status.code() != Some(1) {
384        let stderr = String::from_utf8_lossy(&output.stderr);
385        return Err(format!("ripgrep failed: {}", stderr));
386    }
387
388    let stdout = String::from_utf8_lossy(&output.stdout);
389    Ok(parse_json_output(&stdout))
390}
391
392/// Execute ripgrep search synchronously
393pub fn search_sync(options: &RipgrepOptions) -> Result<String, String> {
394    let rg_path = get_rg_path().ok_or("ripgrep is not available")?;
395
396    let args = build_rg_args(options);
397
398    let mut cmd = Command::new(&rg_path);
399    cmd.args(&args);
400
401    if let Some(ref cwd) = options.cwd {
402        cmd.current_dir(cwd);
403    }
404
405    let output = cmd
406        .output()
407        .map_err(|e| format!("Failed to execute ripgrep: {}", e))?;
408
409    // ripgrep returns 1 when no matches found
410    if output.status.code() == Some(1) {
411        return Ok(String::new());
412    }
413
414    if !output.status.success() {
415        let stderr = String::from_utf8_lossy(&output.stderr);
416        return Err(format!("ripgrep failed: {}", stderr));
417    }
418
419    Ok(String::from_utf8_lossy(&output.stdout).to_string())
420}
421
422/// List files using ripgrep (rg --files)
423pub async fn list_files(options: ListFilesOptions) -> Result<Vec<String>, String> {
424    let rg_path = get_rg_path().ok_or("ripgrep is not available")?;
425
426    let mut args = vec!["--files".to_string()];
427
428    if let Some(ref glob) = options.glob {
429        args.push("--glob".to_string());
430        args.push(glob.clone());
431    }
432
433    if let Some(ref file_type) = options.file_type {
434        args.push("--type".to_string());
435        args.push(file_type.clone());
436    }
437
438    if options.hidden {
439        args.push("--hidden".to_string());
440    }
441
442    if options.no_ignore {
443        args.push("--no-ignore".to_string());
444    }
445
446    let mut cmd = tokio::process::Command::new(&rg_path);
447    cmd.args(&args);
448
449    if let Some(ref cwd) = options.cwd {
450        cmd.current_dir(cwd);
451    }
452
453    let output = cmd
454        .output()
455        .await
456        .map_err(|e| format!("Failed to execute ripgrep: {}", e))?;
457
458    if !output.status.success() && output.status.code() != Some(1) {
459        let stderr = String::from_utf8_lossy(&output.stderr);
460        return Err(format!("ripgrep failed: {}", stderr));
461    }
462
463    let stdout = String::from_utf8_lossy(&output.stdout);
464    let files: Vec<String> = stdout
465        .lines()
466        .filter(|l| !l.is_empty())
467        .map(|l| l.to_string())
468        .collect();
469
470    Ok(files)
471}
472
473/// Options for listing files
474#[derive(Debug, Clone, Default)]
475pub struct ListFilesOptions {
476    pub cwd: Option<PathBuf>,
477    pub glob: Option<String>,
478    pub file_type: Option<String>,
479    pub hidden: bool,
480    pub no_ignore: bool,
481}
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486
487    #[test]
488    fn test_ripgrep_options_default() {
489        let opts = RipgrepOptions::default();
490        assert!(opts.pattern.is_empty());
491        assert!(opts.paths.is_empty());
492        assert!(!opts.ignore_case);
493        assert!(!opts.json);
494    }
495
496    #[test]
497    fn test_ripgrep_match_struct() {
498        let m = RipgrepMatch {
499            path: "test.rs".to_string(),
500            line_number: 10,
501            line_content: "fn main()".to_string(),
502            match_start: 3,
503            match_end: 7,
504        };
505        assert_eq!(m.path, "test.rs");
506        assert_eq!(m.line_number, 10);
507    }
508
509    #[test]
510    fn test_ripgrep_result_struct() {
511        let result = RipgrepResult {
512            matches: vec![],
513            files_searched: 5,
514            match_count: 0,
515            truncated: false,
516        };
517        assert_eq!(result.files_searched, 5);
518        assert!(!result.truncated);
519    }
520
521    #[test]
522    fn test_build_rg_args_basic() {
523        let opts = RipgrepOptions {
524            pattern: "test".to_string(),
525            ..Default::default()
526        };
527        let args = build_rg_args(&opts);
528        assert!(args.contains(&"--".to_string()));
529        assert!(args.contains(&"test".to_string()));
530        assert!(args.contains(&".".to_string()));
531    }
532
533    #[test]
534    fn test_build_rg_args_with_options() {
535        let opts = RipgrepOptions {
536            pattern: "fn".to_string(),
537            ignore_case: true,
538            hidden: true,
539            json: true,
540            max_count: Some(10),
541            ..Default::default()
542        };
543        let args = build_rg_args(&opts);
544        assert!(args.contains(&"-i".to_string()));
545        assert!(args.contains(&"--hidden".to_string()));
546        assert!(args.contains(&"--json".to_string()));
547        assert!(args.contains(&"--max-count".to_string()));
548    }
549
550    #[test]
551    fn test_build_rg_args_with_context() {
552        let opts = RipgrepOptions {
553            pattern: "test".to_string(),
554            before_context: Some(2),
555            after_context: Some(3),
556            ..Default::default()
557        };
558        let args = build_rg_args(&opts);
559        assert!(args.contains(&"-B".to_string()));
560        assert!(args.contains(&"2".to_string()));
561        assert!(args.contains(&"-A".to_string()));
562        assert!(args.contains(&"3".to_string()));
563    }
564
565    #[test]
566    fn test_build_rg_args_with_paths() {
567        let opts = RipgrepOptions {
568            pattern: "test".to_string(),
569            paths: vec![PathBuf::from("src"), PathBuf::from("tests")],
570            ..Default::default()
571        };
572        let args = build_rg_args(&opts);
573        assert!(args.contains(&"src".to_string()));
574        assert!(args.contains(&"tests".to_string()));
575        assert!(!args.contains(&".".to_string()));
576    }
577
578    #[test]
579    fn test_parse_json_output_empty() {
580        let result = parse_json_output("");
581        assert!(result.matches.is_empty());
582        assert_eq!(result.files_searched, 0);
583        assert_eq!(result.match_count, 0);
584    }
585
586    #[test]
587    fn test_parse_json_output_with_match() {
588        let json = r#"{"type":"match","data":{"path":{"text":"test.rs"},"lines":{"text":"fn main()\n"},"line_number":1,"submatches":[{"start":0,"end":2}]}}"#;
589        let result = parse_json_output(json);
590        assert_eq!(result.matches.len(), 1);
591        assert_eq!(result.matches[0].path, "test.rs");
592        assert_eq!(result.matches[0].line_number, 1);
593        assert_eq!(result.match_count, 1);
594    }
595
596    #[test]
597    fn test_list_files_options_default() {
598        let opts = ListFilesOptions::default();
599        assert!(opts.cwd.is_none());
600        assert!(opts.glob.is_none());
601        assert!(!opts.hidden);
602    }
603
604    #[test]
605    fn test_is_ripgrep_available() {
606        // This test just verifies the function runs without panic
607        let _ = is_ripgrep_available();
608    }
609
610    #[test]
611    fn test_get_ripgrep_version() {
612        // This test just verifies the function runs without panic
613        let _ = get_ripgrep_version();
614    }
615}