Skip to main content

codelens_engine/file_ops/
reader.rs

1use crate::project::{ProjectRoot, is_excluded};
2use anyhow::{Context, Result, bail};
3use regex::Regex;
4use std::fs;
5use std::path::PathBuf;
6use walkdir::WalkDir;
7
8use super::{
9    DirectoryEntry, FileMatch, FileReadResult, FlatSymbol, PatternMatch, SmartPatternMatch,
10    compile_glob, find_enclosing_symbol, flatten_to_ranges, to_directory_entry,
11};
12
13/// Maximum file size for read operations (10 MB). Prevents OOM on huge files.
14const MAX_READ_SIZE: u64 = 10 * 1024 * 1024;
15
16pub fn read_file(
17    project: &ProjectRoot,
18    path: &str,
19    start_line: Option<usize>,
20    end_line: Option<usize>,
21) -> Result<FileReadResult> {
22    let resolved = project.resolve(path)?;
23    if !resolved.is_file() {
24        bail!("not a file: {}", resolved.display());
25    }
26    let meta = fs::metadata(&resolved)?;
27    if meta.len() > MAX_READ_SIZE {
28        bail!(
29            "file too large ({:.1} MB > {} MB limit): {}",
30            meta.len() as f64 / 1_048_576.0,
31            MAX_READ_SIZE / 1_048_576,
32            resolved.display()
33        );
34    }
35
36    let content = fs::read_to_string(&resolved)
37        .with_context(|| format!("failed to read {}", resolved.display()))?;
38    let lines: Vec<&str> = content.lines().collect();
39    let total_lines = lines.len();
40    let start = start_line.unwrap_or(0).min(total_lines);
41    let end = end_line.unwrap_or(total_lines).clamp(start, total_lines);
42
43    Ok(FileReadResult {
44        file_path: project.to_relative(&resolved),
45        total_lines,
46        content: lines[start..end].join("\n"),
47    })
48}
49
50pub fn list_dir(project: &ProjectRoot, path: &str, recursive: bool) -> Result<Vec<DirectoryEntry>> {
51    let resolved = project.resolve(path)?;
52    if !resolved.is_dir() {
53        bail!("not a directory: {}", resolved.display());
54    }
55
56    let mut entries = Vec::new();
57    if recursive {
58        for entry in WalkDir::new(&resolved)
59            .min_depth(1)
60            .into_iter()
61            .filter_entry(|entry| !is_excluded(entry.path()))
62        {
63            let entry = entry?;
64            entries.push(to_directory_entry(project, entry.path())?);
65        }
66    } else {
67        for entry in fs::read_dir(&resolved)? {
68            let entry = entry?;
69            entries.push(to_directory_entry(project, &entry.path())?);
70        }
71    }
72
73    entries.sort_by(|a, b| a.path.cmp(&b.path));
74    Ok(entries)
75}
76
77pub fn find_files(
78    project: &ProjectRoot,
79    wildcard_pattern: &str,
80    relative_dir: Option<&str>,
81) -> Result<Vec<FileMatch>> {
82    let base = match relative_dir {
83        Some(path) => project.resolve(path)?,
84        None => project.as_path().to_path_buf(),
85    };
86    if !base.is_dir() {
87        bail!("not a directory: {}", base.display());
88    }
89
90    let matcher = compile_glob(wildcard_pattern)?;
91    let mut matches = Vec::new();
92
93    for entry in WalkDir::new(&base)
94        .into_iter()
95        .filter_entry(|entry| !is_excluded(entry.path()))
96    {
97        let entry = entry?;
98        if entry.file_type().is_file() {
99            let rel = project.to_relative(entry.path());
100            if !matcher.is_match(entry.file_name()) && !matcher.is_match(rel.as_str()) {
101                continue;
102            }
103            matches.push(FileMatch { path: rel });
104        }
105    }
106
107    matches.sort_by(|a, b| a.path.cmp(&b.path));
108    Ok(matches)
109}
110
111/// Minimum file count to justify rayon thread-pool overhead.
112const PARALLEL_FILE_THRESHOLD: usize = 200;
113
114pub fn search_for_pattern(
115    project: &ProjectRoot,
116    pattern: &str,
117    file_glob: Option<&str>,
118    max_results: usize,
119    context_lines_before: usize,
120    context_lines_after: usize,
121) -> Result<Vec<PatternMatch>> {
122    let regex = Regex::new(pattern).with_context(|| format!("invalid regex: {pattern}"))?;
123    let matcher = match file_glob {
124        Some(glob) => Some(compile_glob(glob)?),
125        None => None,
126    };
127
128    // Collect candidate file paths first (WalkDir is not Send)
129    let mut files: Vec<PathBuf> = Vec::new();
130    for entry in WalkDir::new(project.as_path())
131        .into_iter()
132        .filter_entry(|entry| !is_excluded(entry.path()))
133    {
134        let entry = entry?;
135        if !entry.file_type().is_file() {
136            continue;
137        }
138        if let Some(matcher) = &matcher {
139            let rel = project.to_relative(entry.path());
140            if !matcher.is_match(entry.file_name()) && !matcher.is_match(rel.as_str()) {
141                continue;
142            }
143        }
144        files.push(entry.into_path());
145    }
146
147    // Search each file for pattern matches
148    let search_file = |path: &PathBuf| -> Vec<PatternMatch> {
149        let content = match fs::read_to_string(path) {
150            Ok(c) => c,
151            Err(_) => return Vec::new(),
152        };
153        let rel = project.to_relative(path);
154        let lines: Vec<&str> = content.lines().collect();
155        let mut file_matches = Vec::new();
156        for (index, line) in lines.iter().enumerate() {
157            if let Some(found) = regex.find(line) {
158                let before_start = index.saturating_sub(context_lines_before);
159                let after_end = (index + 1 + context_lines_after).min(lines.len());
160                file_matches.push(PatternMatch {
161                    file_path: rel.clone(),
162                    line: index + 1,
163                    column: found.start() + 1,
164                    matched_text: found.as_str().to_owned(),
165                    line_content: line.trim().to_owned(),
166                    context_before: lines[before_start..index]
167                        .iter()
168                        .map(|l| l.to_string())
169                        .collect(),
170                    context_after: lines[(index + 1)..after_end]
171                        .iter()
172                        .map(|l| l.to_string())
173                        .collect(),
174                });
175            }
176        }
177        file_matches
178    };
179
180    let mut results: Vec<PatternMatch> = if files.len() >= PARALLEL_FILE_THRESHOLD {
181        use rayon::prelude::*;
182        files.par_iter().flat_map(search_file).collect()
183    } else {
184        // Sequential for small projects — avoids rayon thread-pool overhead
185        let mut seq_results = Vec::new();
186        for path in &files {
187            seq_results.extend(search_file(path));
188            if seq_results.len() >= max_results {
189                break;
190            }
191        }
192        seq_results
193    };
194
195    results.sort_by(|a, b| a.file_path.cmp(&b.file_path).then(a.line.cmp(&b.line)));
196    results.truncate(max_results);
197    Ok(results)
198}
199
200/// Smart search: pattern search enriched with enclosing symbol context.
201/// For each match, finds the nearest enclosing function/class/method via tree-sitter.
202pub fn search_for_pattern_smart(
203    project: &ProjectRoot,
204    pattern: &str,
205    file_glob: Option<&str>,
206    max_results: usize,
207    context_lines_before: usize,
208    context_lines_after: usize,
209) -> Result<Vec<SmartPatternMatch>> {
210    use crate::symbols::get_symbols_overview;
211
212    let base_results = search_for_pattern(
213        project,
214        pattern,
215        file_glob,
216        max_results,
217        context_lines_before,
218        context_lines_after,
219    )?;
220
221    // Group results by file to avoid re-parsing the same file multiple times
222    let mut by_file: std::collections::HashMap<String, Vec<&PatternMatch>> =
223        std::collections::HashMap::new();
224    for result in &base_results {
225        by_file
226            .entry(result.file_path.clone())
227            .or_default()
228            .push(result);
229    }
230
231    // Cache symbols per file
232    let mut symbol_cache: std::collections::HashMap<String, Vec<FlatSymbol>> =
233        std::collections::HashMap::new();
234    for file_path in by_file.keys() {
235        if let Ok(symbols) = get_symbols_overview(project, file_path, 3) {
236            symbol_cache.insert(file_path.clone(), flatten_to_ranges(&symbols));
237        }
238    }
239
240    let smart_results = base_results
241        .into_iter()
242        .map(|m| {
243            let enclosing = symbol_cache
244                .get(&m.file_path)
245                .and_then(|symbols| find_enclosing_symbol(symbols, m.line));
246            SmartPatternMatch {
247                file_path: m.file_path,
248                line: m.line,
249                column: m.column,
250                matched_text: m.matched_text,
251                line_content: m.line_content,
252                context_before: m.context_before,
253                context_after: m.context_after,
254                enclosing_symbol: enclosing,
255            }
256        })
257        .collect();
258
259    Ok(smart_results)
260}