Skip to main content

context_builder/
file_utils.rs

1use ignore::{DirEntry, WalkBuilder, overrides::OverrideBuilder};
2use std::fs;
3use std::io::{self, Write};
4use std::path::{Path, PathBuf};
5
6/// Collects all files to be processed using `ignore` crate for efficient traversal.
7///
8/// `auto_ignores` are runtime-computed exclusion patterns (e.g., the tool's own
9/// output file or cache directory). They are processed identically to user ignores
10/// but kept separate to avoid polluting user-facing configuration.
11pub fn collect_files(
12    base_path: &Path,
13    filters: &[String],
14    ignores: &[String],
15    auto_ignores: &[String],
16) -> io::Result<Vec<DirEntry>> {
17    let mut walker = WalkBuilder::new(base_path);
18    // By default, the "ignore" crate respects .gitignore and hidden files, so we don't need walker.hidden(false)
19
20    // Build overrides for custom ignore patterns
21    let mut override_builder = OverrideBuilder::new(base_path);
22    for pattern in ignores {
23        // Attention: Confusing pattern ahead!
24        // Add the pattern to the override builder with ! prefix to ignore matching files.
25        // In OverrideBuilder, patterns without ! are whitelist (include) patterns,
26        // while patterns with ! are ignore patterns.
27        let ignore_pattern = format!("!{}", pattern);
28        if let Err(e) = override_builder.add(&ignore_pattern) {
29            return Err(io::Error::new(
30                io::ErrorKind::InvalidInput,
31                format!("Invalid ignore pattern '{}': {}", pattern, e),
32            ));
33        }
34    }
35    // Apply auto-computed ignore patterns (output file, cache dir, etc.)
36    for pattern in auto_ignores {
37        let ignore_pattern = format!("!{}", pattern);
38        if let Err(e) = override_builder.add(&ignore_pattern) {
39            log::warn!("Skipping invalid auto-ignore pattern '{}': {}", pattern, e);
40        }
41    }
42    // Also, always ignore the config file itself
43    if let Err(e) = override_builder.add("!context-builder.toml") {
44        return Err(io::Error::new(
45            io::ErrorKind::InvalidInput,
46            format!("Failed to add config ignore: {}", e),
47        ));
48    }
49
50    let overrides = override_builder.build().map_err(|e| {
51        io::Error::new(
52            io::ErrorKind::InvalidInput,
53            format!("Failed to build overrides: {}", e),
54        )
55    })?;
56    walker.overrides(overrides);
57
58    if !filters.is_empty() {
59        let mut type_builder = ignore::types::TypesBuilder::new();
60        type_builder.add_defaults();
61        for filter in filters {
62            let _ = type_builder.add(filter, &format!("*.{}", filter));
63            type_builder.select(filter);
64        }
65        let types = type_builder.build().unwrap();
66        walker.types(types);
67    }
68
69    let mut files: Vec<DirEntry> = walker
70        .build()
71        .filter_map(Result::ok)
72        .filter(|e| e.file_type().is_some_and(|ft| ft.is_file()))
73        .collect();
74
75    // FIX: Sort files deterministically by path to ensure consistent output order
76    files.sort_by(|a, b| a.path().cmp(b.path()));
77
78    Ok(files)
79}
80
81/// Asks for user confirmation if the number of files is large.
82pub fn confirm_processing(file_count: usize) -> io::Result<bool> {
83    if file_count > 100 {
84        print!(
85            "Warning: You're about to process {} files. This might take a while. Continue? [y/N] ",
86            file_count
87        );
88        io::stdout().flush()?;
89        let mut input = String::new();
90        io::stdin().read_line(&mut input)?;
91        if !input.trim().eq_ignore_ascii_case("y") {
92            return Ok(false);
93        }
94    }
95    Ok(true)
96}
97
98/// Asks for user confirmation to overwrite an existing file.
99pub fn confirm_overwrite(file_path: &str) -> io::Result<bool> {
100    print!("The file '{}' already exists. Overwrite? [y/N] ", file_path);
101    io::stdout().flush()?;
102    let mut input = String::new();
103    io::stdin().read_line(&mut input)?;
104
105    if input.trim().eq_ignore_ascii_case("y") {
106        Ok(true)
107    } else {
108        Ok(false)
109    }
110}
111
112pub fn find_latest_file(dir: &Path) -> io::Result<Option<PathBuf>> {
113    if !dir.is_dir() {
114        return Ok(None);
115    }
116
117    let mut latest_file = None;
118    let mut latest_time = std::time::SystemTime::UNIX_EPOCH;
119
120    for entry in fs::read_dir(dir)? {
121        let entry = entry?;
122        let path = entry.path();
123        if path.is_file() {
124            let metadata = fs::metadata(&path)?;
125            let modified = metadata.modified()?;
126            if modified > latest_time {
127                latest_time = modified;
128                latest_file = Some(path);
129            }
130        }
131    }
132
133    Ok(latest_file)
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use std::fs;
140    use std::path::Path;
141    use tempfile::tempdir;
142
143    fn to_rel_paths(mut entries: Vec<DirEntry>, base: &Path) -> Vec<String> {
144        entries.sort_by_key(|e| e.path().to_path_buf());
145        entries
146            .iter()
147            .map(|e| {
148                e.path()
149                    .strip_prefix(base)
150                    .unwrap()
151                    .to_string_lossy()
152                    .replace('\\', "/")
153            })
154            .collect()
155    }
156
157    #[test]
158    fn collect_files_respects_filters() {
159        let dir = tempdir().unwrap();
160        let base = dir.path();
161
162        // create files
163        fs::create_dir_all(base.join("src")).unwrap();
164        fs::create_dir_all(base.join("scripts")).unwrap();
165        fs::write(base.join("src").join("main.rs"), "fn main() {}").unwrap();
166        fs::write(base.join("Cargo.toml"), "[package]\nname=\"x\"").unwrap();
167        fs::write(base.join("README.md"), "# readme").unwrap();
168        fs::write(base.join("scripts").join("build.sh"), "#!/bin/sh\n").unwrap();
169
170        let filters = vec!["rs".to_string(), "toml".to_string()];
171        let ignores: Vec<String> = vec![];
172
173        let files = collect_files(base, &filters, &ignores, &[]).unwrap();
174        let relative_paths = to_rel_paths(files, base);
175
176        assert!(relative_paths.contains(&"src/main.rs".to_string()));
177        assert!(relative_paths.contains(&"Cargo.toml".to_string()));
178        assert!(!relative_paths.contains(&"README.md".to_string()));
179        assert!(!relative_paths.contains(&"scripts/build.sh".to_string()));
180    }
181
182    #[test]
183    fn collect_files_respects_ignores_for_dirs_and_files() {
184        let dir = tempdir().unwrap();
185        let base = dir.path();
186
187        fs::create_dir_all(base.join("src")).unwrap();
188        fs::create_dir_all(base.join("target")).unwrap();
189        fs::create_dir_all(base.join("node_modules")).unwrap();
190
191        fs::write(base.join("src").join("main.rs"), "fn main() {}").unwrap();
192        fs::write(base.join("target").join("artifact.txt"), "bin").unwrap();
193        fs::write(base.join("node_modules").join("pkg.js"), "console.log();").unwrap();
194        fs::write(base.join("README.md"), "# readme").unwrap();
195
196        let filters: Vec<String> = vec![];
197        let ignores: Vec<String> = vec!["target".into(), "node_modules".into(), "README.md".into()];
198
199        let files = collect_files(base, &filters, &ignores, &[]).unwrap();
200        let relative_paths = to_rel_paths(files, base);
201
202        assert!(relative_paths.contains(&"src/main.rs".to_string()));
203        assert!(!relative_paths.contains(&"target/artifact.txt".to_string()));
204        assert!(!relative_paths.contains(&"node_modules/pkg.js".to_string()));
205        assert!(!relative_paths.contains(&"README.md".to_string()));
206    }
207
208    #[test]
209    fn collect_files_handles_invalid_ignore_pattern() {
210        let dir = tempdir().unwrap();
211        let base = dir.path();
212
213        fs::create_dir_all(base.join("src")).unwrap();
214        fs::write(base.join("src").join("main.rs"), "fn main() {}").unwrap();
215
216        let filters: Vec<String> = vec![];
217        let ignores: Vec<String> = vec!["[".into()]; // Invalid regex pattern
218
219        let result = collect_files(base, &filters, &ignores, &[]);
220        assert!(result.is_err());
221        assert!(
222            result
223                .unwrap_err()
224                .to_string()
225                .contains("Invalid ignore pattern")
226        );
227    }
228
229    #[test]
230    fn collect_files_empty_directory() {
231        let dir = tempdir().unwrap();
232        let base = dir.path();
233
234        let filters: Vec<String> = vec![];
235        let ignores: Vec<String> = vec![];
236
237        let files = collect_files(base, &filters, &ignores, &[]).unwrap();
238        assert!(files.is_empty());
239    }
240
241    #[test]
242    fn collect_files_no_matching_filters() {
243        let dir = tempdir().unwrap();
244        let base = dir.path();
245
246        fs::write(base.join("README.md"), "# readme").unwrap();
247        fs::write(base.join("script.py"), "print('hello')").unwrap();
248
249        let filters = vec!["rs".to_string()]; // Only Rust files
250        let ignores: Vec<String> = vec![];
251
252        let files = collect_files(base, &filters, &ignores, &[]).unwrap();
253        assert!(files.is_empty());
254    }
255
256    #[test]
257    fn collect_files_ignores_config_file() {
258        let dir = tempdir().unwrap();
259        let base = dir.path();
260
261        fs::write(base.join("context-builder.toml"), "[config]").unwrap();
262        fs::write(base.join("other.toml"), "[other]").unwrap();
263
264        let filters: Vec<String> = vec![];
265        let ignores: Vec<String> = vec![];
266
267        let files = collect_files(base, &filters, &ignores, &[]).unwrap();
268        let relative_paths = to_rel_paths(files, base);
269
270        assert!(!relative_paths.contains(&"context-builder.toml".to_string()));
271        assert!(relative_paths.contains(&"other.toml".to_string()));
272    }
273
274    #[test]
275    fn confirm_processing_small_count() {
276        // Test that small file counts don't require confirmation
277        let result = confirm_processing(50);
278        assert!(result.is_ok());
279        assert!(result.unwrap());
280    }
281
282    #[test]
283    fn find_latest_file_empty_directory() {
284        let dir = tempdir().unwrap();
285        let result = find_latest_file(dir.path()).unwrap();
286        assert!(result.is_none());
287    }
288
289    #[test]
290    fn find_latest_file_nonexistent_directory() {
291        let dir = tempdir().unwrap();
292        let nonexistent = dir.path().join("nonexistent");
293        let result = find_latest_file(&nonexistent).unwrap();
294        assert!(result.is_none());
295    }
296
297    #[test]
298    fn find_latest_file_single_file() {
299        let dir = tempdir().unwrap();
300        let file_path = dir.path().join("test.txt");
301        fs::write(&file_path, "content").unwrap();
302
303        let result = find_latest_file(dir.path()).unwrap();
304        assert!(result.is_some());
305        assert_eq!(result.unwrap(), file_path);
306    }
307
308    #[test]
309    fn find_latest_file_multiple_files() {
310        let dir = tempdir().unwrap();
311
312        let file1 = dir.path().join("old.txt");
313        let file2 = dir.path().join("new.txt");
314
315        fs::write(&file1, "old content").unwrap();
316        std::thread::sleep(std::time::Duration::from_millis(10));
317        fs::write(&file2, "new content").unwrap();
318
319        let result = find_latest_file(dir.path()).unwrap();
320        assert!(result.is_some());
321        assert_eq!(result.unwrap(), file2);
322    }
323
324    #[test]
325    fn find_latest_file_ignores_directories() {
326        let dir = tempdir().unwrap();
327        let subdir = dir.path().join("subdir");
328        fs::create_dir(&subdir).unwrap();
329
330        let file_path = dir.path().join("test.txt");
331        fs::write(&file_path, "content").unwrap();
332
333        let result = find_latest_file(dir.path()).unwrap();
334        assert!(result.is_some());
335        assert_eq!(result.unwrap(), file_path);
336    }
337
338    #[test]
339    fn test_confirm_processing_requires_user_interaction() {
340        // This test verifies the function signature and basic logic for large file counts
341        // The actual user interaction cannot be tested in unit tests
342
343        // For file counts <= 100, should return Ok(true) without prompting
344        // This is already tested implicitly by the fact that small counts don't prompt
345
346        // For file counts > 100, the function would prompt user input
347        // We can't easily test this without mocking stdin, but we can verify
348        // that the function exists and has the expected signature
349        use std::io::Cursor;
350
351        // Create a mock stdin that simulates user typing "y"
352        let input = b"y\n";
353        let _ = Cursor::new(input);
354
355        // We can't easily override stdin in a unit test without complex setup,
356        // so we'll just verify the function exists and handles small counts
357        let result = confirm_processing(50);
358        assert!(result.is_ok());
359        assert!(result.unwrap());
360    }
361
362    #[test]
363    fn test_confirm_overwrite_function_exists() {
364        // Similar to confirm_processing, this function requires user interaction
365        // We can verify it exists and has the expected signature
366
367        // For testing purposes, we know this function prompts for user input
368        // and returns Ok(true) if user types "y" or "Y", Ok(false) otherwise
369
370        // The function signature should be:
371        // pub fn confirm_overwrite(file_path: &str) -> io::Result<bool>
372
373        // We can't easily test the interactive behavior without mocking stdin,
374        // but we can ensure the function compiles and has the right signature
375        let _: fn(&str) -> std::io::Result<bool> = confirm_overwrite;
376    }
377
378    #[test]
379    fn test_collect_files_handles_permission_errors() {
380        // Test what happens when we can't access a directory
381        // This is harder to test portably, but we can test with invalid patterns
382        let dir = tempdir().unwrap();
383        let base = dir.path();
384
385        // Test with a pattern that might cause issues
386        let filters: Vec<String> = vec![];
387        let ignores: Vec<String> = vec!["[invalid".into()]; // Incomplete bracket
388
389        let result = collect_files(base, &filters, &ignores, &[]);
390        assert!(result.is_err());
391    }
392
393    #[test]
394    fn test_find_latest_file_permission_error() {
395        // Test behavior when we can't read directory metadata
396        use std::path::Path;
397
398        // Test with a path that doesn't exist
399        let nonexistent = Path::new("/this/path/should/not/exist/anywhere");
400        let result = find_latest_file(nonexistent);
401
402        // Should return Ok(None) for non-existent directories
403        assert!(result.is_ok());
404        assert!(result.unwrap().is_none());
405    }
406
407    #[test]
408    fn test_collect_files_with_symlinks() {
409        // Test behavior with symbolic links (if supported on platform)
410        let dir = tempdir().unwrap();
411        let base = dir.path();
412
413        // Create a regular file
414        fs::write(base.join("regular.txt"), "content").unwrap();
415
416        // On Unix-like systems, try creating a symlink
417        #[cfg(unix)]
418        {
419            use std::os::unix::fs::symlink;
420            let _ = symlink("regular.txt", base.join("link.txt"));
421        }
422
423        // On Windows, symlinks require special privileges, so skip this part
424        #[cfg(windows)]
425        {
426            // Just create another regular file to test
427            fs::write(base.join("another.txt"), "content2").unwrap();
428        }
429
430        let filters: Vec<String> = vec![];
431        let ignores: Vec<String> = vec![];
432
433        let files = collect_files(base, &filters, &ignores, &[]).unwrap();
434        // Should find at least the regular file
435        assert!(!files.is_empty());
436    }
437}