Skip to main content

limit_cli/
file_finder.rs

1use frizbee::Config as FrizbeeConfig;
2use ignore::WalkBuilder;
3use std::path::{Path, PathBuf};
4use std::time::{Duration, Instant};
5
6/// Represents a matched file with fuzzy score
7#[derive(Debug, Clone)]
8pub struct FileMatch {
9    /// Relative path from working directory
10    pub path: PathBuf,
11    /// Whether it's a directory
12    pub is_dir: bool,
13    /// Fuzzy match score (higher is better)
14    pub score: i64,
15}
16
17/// File finder with fuzzy matching and caching
18/// Uses the `ignore` crate from ripgrep to automatically respect .gitignore
19pub struct FileFinder {
20    /// Working directory for file scanning
21    working_dir: PathBuf,
22    /// Cached file list
23    cached_files: Vec<PathBuf>,
24    /// Last scan timestamp
25    last_scan: Option<Instant>,
26    /// Cache time-to-live
27    cache_ttl: Duration,
28    /// Maximum scan depth
29    max_depth: usize,
30}
31
32impl FileFinder {
33    /// Create a new FileFinder for the given working directory
34    /// Automatically respects .gitignore, .ignore, and other standard ignore files
35    pub fn new(working_dir: PathBuf) -> Self {
36        Self {
37            working_dir,
38            cached_files: Vec::new(),
39            last_scan: None,
40            cache_ttl: Duration::from_secs(5),
41            max_depth: 10,
42        }
43    }
44
45    /// Scan directory and return all files (relative paths)
46    /// The `ignore` crate automatically handles .gitignore, .ignore, etc.
47    pub fn scan_files(&mut self) -> &Vec<PathBuf> {
48        // Check cache
49        if let Some(last_scan) = self.last_scan {
50            if last_scan.elapsed() < self.cache_ttl {
51                return &self.cached_files;
52            }
53        }
54
55        // Rescan
56        self.cached_files.clear();
57
58        // Use ignore crate's WalkBuilder which respects .gitignore automatically
59        for result in WalkBuilder::new(&self.working_dir)
60            .max_depth(Some(self.max_depth))
61            .hidden(true) // Skip hidden files
62            .git_ignore(true) // Respect .gitignore
63            .git_global(true) // Respect global gitignore
64            .git_exclude(true) // Respect .git/info/exclude
65            .ignore(true) // Respect .ignore files
66            .build()
67        {
68            match result {
69                Ok(entry) => {
70                    let path = entry.path();
71
72                    // Get relative path
73                    if let Ok(rel_path) = path.strip_prefix(&self.working_dir) {
74                        // Skip the root directory itself (empty path)
75                        if rel_path.as_os_str().is_empty() {
76                            continue;
77                        }
78
79                        self.cached_files.push(rel_path.to_path_buf());
80                    }
81                }
82                Err(err) => {
83                    tracing::debug!("Error scanning directory: {}", err);
84                }
85            }
86        }
87
88        // Sort for consistent ordering
89        self.cached_files.sort();
90
91        self.last_scan = Some(Instant::now());
92        &self.cached_files
93    }
94
95    /// Filter files by query using fuzzy matching with frizbee
96    pub fn filter_files(&self, files: &[PathBuf], query: &str) -> Vec<FileMatch> {
97        if query.is_empty() {
98            // Return first 20 files if no query
99            return files
100                .iter()
101                .take(20)
102                .map(|p| FileMatch {
103                    path: p.clone(),
104                    is_dir: p.to_string_lossy().ends_with('/'),
105                    score: 0,
106                })
107                .collect();
108        }
109
110        // Use frizbee for fuzzy matching
111        // Convert paths to strings first (owned)
112        let haystacks: Vec<String> = files
113            .iter()
114            .map(|p| p.to_string_lossy().to_string())
115            .collect();
116
117        // Create a slice of &str for frizbee
118        let haystack_refs: Vec<&str> = haystacks.iter().map(|s| s.as_str()).collect();
119
120        // Configure frizbee for fuzzy matching
121        let config = FrizbeeConfig::default();
122
123        // Match files against query
124        let fuzzy_matches = frizbee::match_list(query, &haystack_refs, &config);
125
126        // Convert frizbee matches to our FileMatch type
127        let mut matches: Vec<FileMatch> = fuzzy_matches
128            .into_iter()
129            .filter_map(|m| {
130                // frizbee returns Match with index, score, and exact
131                if (m.index as usize) < files.len() {
132                    let path = files[m.index as usize].clone();
133                    let path_str = haystacks[m.index as usize].clone();
134
135                    Some(FileMatch {
136                        path,
137                        is_dir: path_str.ends_with('/'),
138                        score: m.score as i64,
139                    })
140                } else {
141                    None
142                }
143            })
144            .collect();
145
146        // Sort by score (descending) - frizbee returns higher scores for better matches
147        matches.sort_by(|a, b| b.score.cmp(&a.score));
148
149        // Limit to 20 results
150        matches.truncate(20);
151        matches
152    }
153
154    /// Get the working directory
155    pub fn working_dir(&self) -> &Path {
156        &self.working_dir
157    }
158
159    /// Force refresh the cache
160    pub fn refresh_cache(&mut self) {
161        self.last_scan = None;
162        self.scan_files();
163    }
164}
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169
170    #[test]
171    fn test_file_finder_basic() {
172        let dir = std::env::current_dir().unwrap();
173        let mut finder = FileFinder::new(dir);
174        let files = finder.scan_files();
175
176        // Should find Cargo.toml in current directory
177        assert!(files.iter().any(|p| p.to_string_lossy() == "Cargo.toml"));
178
179        // Should NOT include .git directory (respects .gitignore)
180        assert!(!files
181            .iter()
182            .any(|p| p.to_string_lossy().starts_with(".git/")));
183
184        // Should NOT include target directory (respects .gitignore)
185        assert!(!files
186            .iter()
187            .any(|p| p.to_string_lossy().starts_with("target/")));
188    }
189
190    #[test]
191    fn test_filter_files() {
192        let dir = std::env::current_dir().unwrap();
193        let mut finder = FileFinder::new(dir);
194        let files = finder.scan_files().clone();
195
196        let matches = finder.filter_files(&files, "Cargo");
197        assert!(!matches.is_empty());
198        assert!(matches
199            .iter()
200            .any(|m| m.path.to_string_lossy() == "Cargo.toml"));
201    }
202
203    #[test]
204    fn test_cache_ttl() {
205        let dir = std::env::current_dir().unwrap();
206        let mut finder = FileFinder::new(dir);
207        finder.cache_ttl = std::time::Duration::from_millis(50);
208
209        // First scan
210        let files1 = finder.scan_files().clone();
211
212        // Should use cache immediately
213        let files2 = finder.scan_files().clone();
214        assert_eq!(files1.len(), files2.len());
215
216        // Wait for cache to expire
217        std::thread::sleep(std::time::Duration::from_millis(60));
218
219        // Should have rescanned
220        let files3 = finder.scan_files().clone();
221        assert!(!files3.is_empty());
222    }
223
224    #[test]
225    fn test_gitignore_respected() {
226        let dir = std::env::current_dir().unwrap();
227        let mut finder = FileFinder::new(dir);
228        let files = finder.scan_files();
229
230        // These directories should be excluded by .gitignore
231        let file_paths: Vec<String> = files
232            .iter()
233            .map(|p| p.to_string_lossy().to_string())
234            .collect();
235
236        // Check that common ignored patterns are not present
237        for path in &file_paths {
238            assert!(
239                !path.starts_with("target/"),
240                "Found target/ in results: {}",
241                path
242            );
243            assert!(
244                !path.starts_with(".git/"),
245                "Found .git/ in results: {}",
246                path
247            );
248        }
249    }
250}