context_creator/core/
search.rs

1//! Core search functionality using ripgrep
2
3use anyhow::{Context, Result};
4use ignore::WalkBuilder;
5use std::fs::File;
6use std::io::{BufRead, BufReader};
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Mutex};
9
10/// Configuration for search operations
11pub struct SearchConfig<'a> {
12    pub pattern: &'a str,
13    pub path: &'a Path,
14    pub case_insensitive: bool,
15    pub include_globs: &'a [String],
16    pub exclude_globs: &'a [String],
17}
18
19/// Maximum file size to process (10MB)
20const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
21
22/// Find files containing matches for the given pattern using parallel search
23pub fn find_files_with_matches(config: &SearchConfig) -> Result<Vec<PathBuf>> {
24    // Build the walker with include/exclude patterns
25    let mut builder = WalkBuilder::new(config.path);
26
27    // Configure walker to respect gitignore and exclude hidden files
28    builder
29        .hidden(true) // Ignore hidden files (including .git)
30        .git_ignore(true) // Respect .gitignore
31        .git_global(true) // Respect global gitignore
32        .git_exclude(true) // Respect .git/info/exclude
33        .ignore(true) // Respect .ignore files
34        .parents(true); // Respect parent .gitignore files
35
36    // Enable parallel walking for maximum performance
37    builder.threads(num_cpus::get());
38
39    // Handle both include and exclude patterns using OverrideBuilder
40    if !config.include_globs.is_empty() || !config.exclude_globs.is_empty() {
41        let mut overrides = ignore::overrides::OverrideBuilder::new(config.path);
42
43        // If we have include patterns, add them
44        if !config.include_globs.is_empty() {
45            for pattern in config.include_globs {
46                overrides.add(pattern)?;
47            }
48        } else if !config.exclude_globs.is_empty() {
49            // If we only have exclude patterns, include everything first
50            overrides.add("**/*")?;
51        }
52
53        // Add exclude patterns with ! prefix
54        for pattern in config.exclude_globs {
55            let exclude_pattern = format!("!{pattern}");
56            overrides.add(&exclude_pattern)?;
57        }
58
59        builder.overrides(overrides.build()?);
60    }
61
62    // Prepare pattern for search (pre-compute lowercase version if needed)
63    let pattern_lower = if config.case_insensitive {
64        Some(config.pattern.to_lowercase())
65    } else {
66        None
67    };
68
69    // Thread-safe collection of matches
70    let matches = Arc::new(Mutex::new(Vec::new()));
71    let matches_clone = matches.clone();
72
73    // Build parallel walker
74    builder.build_parallel().run(|| {
75        let matches = matches_clone.clone();
76        let pattern = config.pattern;
77        let pattern_lower = pattern_lower.clone();
78
79        Box::new(move |entry| {
80            if let Ok(entry) = entry {
81                let path = entry.path();
82
83                // Skip directories
84                if path.is_dir() {
85                    return ignore::WalkState::Continue;
86                }
87
88                // Check if file should be searched
89                if should_search_file(path, pattern, pattern_lower.as_deref()) {
90                    matches.lock().unwrap().push(path.to_path_buf());
91                }
92            }
93            ignore::WalkState::Continue
94        })
95    });
96
97    // Extract results
98    let results = Arc::try_unwrap(matches)
99        .map(|mutex| mutex.into_inner().unwrap())
100        .unwrap_or_else(|arc| arc.lock().unwrap().clone());
101
102    Ok(results)
103}
104
105/// Check if a file contains the search pattern using streaming
106fn should_search_file(path: &Path, pattern: &str, pattern_lower: Option<&str>) -> bool {
107    // First check file size to prevent DoS
108    if let Ok(metadata) = path.metadata() {
109        if metadata.len() > MAX_FILE_SIZE {
110            return false;
111        }
112    }
113
114    // Open file for streaming search
115    let file = match File::open(path)
116        .with_context(|| format!("Failed to open file: {}", path.display()))
117    {
118        Ok(f) => f,
119        Err(_) => return false,
120    };
121
122    let reader = BufReader::new(file);
123
124    // Stream through file line by line
125    if let Some(pattern_lower) = pattern_lower {
126        // Case-insensitive search
127        for line in reader.lines().map_while(Result::ok) {
128            if line.to_lowercase().contains(pattern_lower) {
129                return true;
130            }
131        }
132    } else {
133        // Case-sensitive search using memchr for maximum performance
134        // For short patterns, use the fast substring search
135        if pattern.len() <= 32 {
136            for line in reader.lines().map_while(Result::ok) {
137                if line.contains(pattern) {
138                    return true;
139                }
140            }
141        } else {
142            // For longer patterns, use boyer-moore-like algorithm
143            for line in reader.lines().map_while(Result::ok) {
144                if fast_substring_search(&line, pattern) {
145                    return true;
146                }
147            }
148        }
149    }
150
151    false
152}
153
154/// Fast substring search optimized for longer patterns
155fn fast_substring_search(haystack: &str, needle: &str) -> bool {
156    // Use Rust's built-in contains which is highly optimized
157    // It uses SIMD instructions when available
158    haystack.contains(needle)
159}