Skip to main content

perl_workspace_discovery/
lib.rs

1//! Git-aware Perl workspace file discovery.
2//!
3//! This crate finds Perl source files in a workspace root with a two-step strategy:
4//! 1. Try `git ls-files` for fast, `.gitignore`-aware enumeration.
5//! 2. Fall back to filesystem walking with `WalkDir` when git is unavailable.
6//!
7//! The resulting behavior is intentionally conservative: common non-source directories
8//! are skipped in both modes (`.git`, `.hg`, `.svn`, `target`, `node_modules`, `.cache`).
9
10// Lint enforcement: library code must use tracing, not direct stderr/stdout prints.
11#![deny(clippy::print_stderr, clippy::print_stdout)]
12#![cfg_attr(test, allow(clippy::print_stderr, clippy::print_stdout))]
13
14use perl_source_file::is_perl_source_path;
15use perl_workspace_ignore::{is_skipped_dir_name, path_contains_skipped_component};
16use std::path::{Path, PathBuf};
17use std::time::{Duration, Instant};
18use walkdir::{DirEntry, WalkDir};
19
20const GIT_LS_FILES_ARGS: [&str; 5] =
21    ["ls-files", "-z", "--cached", "--others", "--exclude-standard"];
22
23/// How files were discovered.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum DiscoveryMethod {
26    /// Files discovered via `git ls-files`.
27    Git,
28    /// Files discovered via `WalkDir` traversal.
29    Walk,
30}
31
32/// File discovery result metadata.
33#[derive(Debug, Clone)]
34pub struct DiscoveryResult {
35    /// Discovered Perl source files.
36    pub files: Vec<PathBuf>,
37    /// Discovery method used.
38    pub method: DiscoveryMethod,
39    /// Elapsed discovery duration.
40    pub duration: Duration,
41    /// Number of entries excluded by extension/skip rules.
42    pub excluded_count: usize,
43}
44
45/// Discover Perl source files under `root`.
46///
47/// Strategy:
48/// 1. Attempt `git ls-files -z --cached --others --exclude-standard`
49/// 2. If git is unavailable or the root is not a repository, use `WalkDir`
50#[must_use]
51pub fn discover_perl_files(root: &Path) -> DiscoveryResult {
52    let start = Instant::now();
53
54    match try_git_discovery(root, start) {
55        Ok(result) => result,
56        Err(_) => walk_discovery(root, start),
57    }
58}
59
60fn try_git_discovery(root: &Path, start: Instant) -> Result<DiscoveryResult, std::io::Error> {
61    let output = std::process::Command::new("git")
62        .args(GIT_LS_FILES_ARGS)
63        .current_dir(root)
64        .stdout(std::process::Stdio::piped())
65        .stderr(std::process::Stdio::null())
66        .output()?;
67
68    if !output.status.success() {
69        return Err(std::io::Error::other("git ls-files failed"));
70    }
71
72    let (files, excluded_count) = parse_git_ls_files_output(root, &output.stdout);
73    let result = DiscoveryResult {
74        files,
75        method: DiscoveryMethod::Git,
76        duration: start.elapsed(),
77        excluded_count,
78    };
79
80    log_discovery(&result);
81    Ok(result)
82}
83
84fn parse_git_ls_files_output(root: &Path, stdout: &[u8]) -> (Vec<PathBuf>, usize) {
85    let stdout = String::from_utf8_lossy(stdout);
86    let mut files = Vec::new();
87    let mut excluded_count: usize = 0;
88
89    for entry in stdout.split('\0') {
90        if entry.is_empty() {
91            continue;
92        }
93
94        let relative_path = Path::new(entry);
95        if path_contains_skipped_component(relative_path) {
96            excluded_count += 1;
97            continue;
98        }
99
100        let path = root.join(relative_path);
101        if is_perl_source_path(&path) {
102            files.push(path);
103        } else {
104            excluded_count += 1;
105        }
106    }
107
108    (files, excluded_count)
109}
110
111fn walk_discovery(root: &Path, start: Instant) -> DiscoveryResult {
112    let mut files = Vec::new();
113    let mut excluded_count: usize = 0;
114
115    for entry in WalkDir::new(root)
116        .follow_links(false)
117        .into_iter()
118        .filter_entry(|entry| !should_skip_dir(entry))
119    {
120        let entry = match entry {
121            Ok(entry) => entry,
122            Err(_) => continue,
123        };
124
125        if !entry.file_type().is_file() {
126            continue;
127        }
128
129        if is_perl_source_path(entry.path()) {
130            files.push(entry.path().to_path_buf());
131        } else {
132            excluded_count += 1;
133        }
134    }
135
136    let result = DiscoveryResult {
137        files,
138        method: DiscoveryMethod::Walk,
139        duration: start.elapsed(),
140        excluded_count,
141    };
142
143    log_discovery(&result);
144    result
145}
146
147fn should_skip_dir(entry: &DirEntry) -> bool {
148    if !entry.file_type().is_dir() {
149        return false;
150    }
151
152    is_skipped_dir_name(&entry.file_name().to_string_lossy())
153}
154
155fn log_discovery(result: &DiscoveryResult) {
156    tracing::debug!(
157        files = result.files.len(),
158        method = ?result.method,
159        duration_ms = result.duration.as_secs_f64() * 1000.0,
160        excluded = result.excluded_count,
161        "workspace discovery complete"
162    );
163}
164
165#[cfg(test)]
166mod tests {
167    use super::{
168        DiscoveryMethod, parse_git_ls_files_output, path_contains_skipped_component,
169        should_skip_dir, walk_discovery,
170    };
171    use std::fs;
172    use std::path::Path;
173    use std::time::Instant;
174
175    type TestResult = Result<(), Box<dyn std::error::Error>>;
176
177    fn create_file(root: &Path, relative: &str) -> TestResult {
178        let path = root.join(relative);
179        if let Some(parent) = path.parent() {
180            fs::create_dir_all(parent)?;
181        }
182        fs::write(path, "# synthetic\n")?;
183        Ok(())
184    }
185
186    #[test]
187    fn parses_git_output_and_filters_entries() {
188        let root = Path::new("/tmp/workspace");
189        let payload = b"lib/Foo.pm\0README.md\0node_modules/pkg.pm\0script.pl\0";
190
191        let (files, excluded_count) = parse_git_ls_files_output(root, payload);
192
193        assert_eq!(files.len(), 2);
194        assert!(files.iter().any(|path| path.ends_with("lib/Foo.pm")));
195        assert!(files.iter().any(|path| path.ends_with("script.pl")));
196        assert_eq!(excluded_count, 2);
197    }
198
199    #[test]
200    fn skipped_component_detection_is_consistent() {
201        assert!(path_contains_skipped_component(Path::new("/repo/node_modules/pkg.pm")));
202        assert!(path_contains_skipped_component(Path::new("/repo/target/build/generated.pm")));
203        assert!(!path_contains_skipped_component(Path::new("/repo/lib/My/Module.pm")));
204    }
205
206    #[test]
207    fn parse_git_output_ignores_skipped_names_in_workspace_root_path() {
208        let root = Path::new("/tmp/target/workspace");
209        let payload = b"lib/Foo.pm\0";
210
211        let (files, excluded_count) = parse_git_ls_files_output(root, payload);
212
213        assert_eq!(files.len(), 1);
214        assert!(files[0].ends_with("lib/Foo.pm"));
215        assert_eq!(excluded_count, 0);
216    }
217
218    #[test]
219    fn walk_discovery_ignores_skipped_directories() -> TestResult {
220        let tmp = tempfile::tempdir()?;
221        let root = tmp.path();
222
223        create_file(root, "lib/Foo.pm")?;
224        create_file(root, "node_modules/pkg.pm")?;
225        create_file(root, "target/build/generated.pm")?;
226        create_file(root, ".cache/precompiled.pm")?;
227
228        let result = walk_discovery(root, Instant::now());
229        assert_eq!(result.method, DiscoveryMethod::Walk);
230        assert_eq!(result.files.len(), 1);
231        assert!(result.files[0].ends_with("lib/Foo.pm"));
232
233        Ok(())
234    }
235
236    #[test]
237    fn should_skip_dir_matches_conventional_noise_directories() -> TestResult {
238        let tmp = tempfile::tempdir()?;
239        let root = tmp.path();
240
241        fs::create_dir_all(root.join(".git"))?;
242        fs::create_dir_all(root.join("node_modules"))?;
243        fs::create_dir_all(root.join("src"))?;
244
245        let mut seen_git = false;
246        let mut seen_node_modules = false;
247        let mut seen_src = false;
248
249        for entry in walkdir::WalkDir::new(root).max_depth(1).into_iter().flatten() {
250            if entry.path() == root {
251                continue;
252            }
253            let name = entry.file_name().to_string_lossy();
254            match name.as_ref() {
255                ".git" => {
256                    seen_git = true;
257                    assert!(should_skip_dir(&entry));
258                }
259                "node_modules" => {
260                    seen_node_modules = true;
261                    assert!(should_skip_dir(&entry));
262                }
263                "src" => {
264                    seen_src = true;
265                    assert!(!should_skip_dir(&entry));
266                }
267                _ => {}
268            }
269        }
270
271        assert!(seen_git);
272        assert!(seen_node_modules);
273        assert!(seen_src);
274
275        Ok(())
276    }
277
278    // --- Additional coverage: parse_git_ls_files_output edge cases ---
279
280    #[test]
281    fn parse_git_output_empty_input_returns_nothing() {
282        let root = Path::new("/tmp/workspace");
283        let (files, excluded_count) = parse_git_ls_files_output(root, b"");
284        assert_eq!(files.len(), 0);
285        assert_eq!(excluded_count, 0);
286    }
287
288    #[test]
289    fn parse_git_output_only_null_separators() {
290        let root = Path::new("/tmp/workspace");
291        let (files, excluded_count) = parse_git_ls_files_output(root, b"\0\0\0");
292        assert_eq!(files.len(), 0);
293        assert_eq!(excluded_count, 0);
294    }
295
296    #[test]
297    fn parse_git_output_recognizes_all_perl_extensions() {
298        let root = Path::new("/tmp/workspace");
299        let payload = b"lib/Foo.pm\0scripts/run.pl\0t/basic.t\0app/main.psgi\0";
300        let (files, excluded_count) = parse_git_ls_files_output(root, payload);
301
302        assert_eq!(files.len(), 4);
303        assert!(files.iter().any(|p| p.ends_with("Foo.pm")));
304        assert!(files.iter().any(|p| p.ends_with("run.pl")));
305        assert!(files.iter().any(|p| p.ends_with("basic.t")));
306        assert!(files.iter().any(|p| p.ends_with("main.psgi")));
307        assert_eq!(excluded_count, 0);
308    }
309
310    #[test]
311    fn parse_git_output_counts_non_perl_as_excluded() {
312        let root = Path::new("/tmp/workspace");
313        let payload = b"README.md\0Makefile\0config.yaml\0";
314        let (files, excluded_count) = parse_git_ls_files_output(root, payload);
315
316        assert_eq!(files.len(), 0);
317        assert_eq!(excluded_count, 3);
318    }
319
320    #[test]
321    fn parse_git_output_excludes_all_skipped_directories() {
322        let root = Path::new("/tmp/workspace");
323        let payload = b".git/hooks/pre-commit.pl\0.hg/config.pm\0.svn/entries.pm\0target/out.pm\0node_modules/dep.pm\0.cache/fast.pm\0";
324        let (files, excluded_count) = parse_git_ls_files_output(root, payload);
325
326        assert_eq!(files.len(), 0);
327        assert_eq!(excluded_count, 6);
328    }
329
330    #[test]
331    fn parse_git_output_joins_root_to_relative_paths() {
332        let root = Path::new("/home/user/project");
333        let payload = b"lib/Module.pm\0";
334        let (files, _) = parse_git_ls_files_output(root, payload);
335
336        assert_eq!(files.len(), 1);
337        assert_eq!(files[0], Path::new("/home/user/project/lib/Module.pm"));
338    }
339
340    // --- Additional coverage: path_contains_skipped_component ---
341
342    #[test]
343    fn skipped_component_detects_each_directory_individually() {
344        let skipped = [".git", ".hg", ".svn", "target", "node_modules", ".cache"];
345        for dir in skipped {
346            let path_str = format!("lib/{dir}/nested.pm");
347            assert!(
348                path_contains_skipped_component(Path::new(&path_str)),
349                "expected {dir} to be skipped"
350            );
351        }
352    }
353
354    #[test]
355    fn skipped_component_allows_safe_directories() {
356        let safe = ["lib", "src", "bin", "t", "scripts", "blib"];
357        for dir in safe {
358            let path_str = format!("{dir}/Module.pm");
359            assert!(
360                !path_contains_skipped_component(Path::new(&path_str)),
361                "expected {dir} to be allowed"
362            );
363        }
364    }
365
366    #[test]
367    fn skipped_component_empty_path_returns_false() {
368        assert!(!path_contains_skipped_component(Path::new("")));
369    }
370
371    #[test]
372    fn skipped_component_single_filename_returns_false() {
373        assert!(!path_contains_skipped_component(Path::new("Module.pm")));
374    }
375
376    #[test]
377    fn skipped_component_deeply_nested() {
378        assert!(path_contains_skipped_component(Path::new("a/b/c/node_modules/d/e/f.pm")));
379    }
380
381    // --- Additional coverage: walk_discovery edge cases ---
382
383    #[test]
384    fn walk_discovery_empty_directory() -> TestResult {
385        let tmp = tempfile::tempdir()?;
386        let result = walk_discovery(tmp.path(), Instant::now());
387
388        assert_eq!(result.method, DiscoveryMethod::Walk);
389        assert_eq!(result.files.len(), 0);
390        assert_eq!(result.excluded_count, 0);
391
392        Ok(())
393    }
394
395    #[test]
396    fn walk_discovery_only_non_perl_files() -> TestResult {
397        let tmp = tempfile::tempdir()?;
398        let root = tmp.path();
399
400        create_file(root, "README.md")?;
401        create_file(root, "Makefile")?;
402        create_file(root, "config.yaml")?;
403
404        let result = walk_discovery(root, Instant::now());
405        assert_eq!(result.method, DiscoveryMethod::Walk);
406        assert_eq!(result.files.len(), 0);
407        assert_eq!(result.excluded_count, 3);
408
409        Ok(())
410    }
411
412    #[test]
413    fn walk_discovery_finds_all_perl_extensions() -> TestResult {
414        let tmp = tempfile::tempdir()?;
415        let root = tmp.path();
416
417        create_file(root, "lib/Foo.pm")?;
418        create_file(root, "bin/run.pl")?;
419        create_file(root, "t/basic.t")?;
420        create_file(root, "app/main.psgi")?;
421
422        let result = walk_discovery(root, Instant::now());
423        assert_eq!(result.files.len(), 4);
424
425        Ok(())
426    }
427
428    #[test]
429    fn walk_discovery_deeply_nested_perl_files() -> TestResult {
430        let tmp = tempfile::tempdir()?;
431        let root = tmp.path();
432
433        create_file(root, "a/b/c/d/e/Deep.pm")?;
434        create_file(root, "x/y/z/script.pl")?;
435
436        let result = walk_discovery(root, Instant::now());
437        assert_eq!(result.files.len(), 2);
438        assert!(result.files.iter().any(|p| p.ends_with("Deep.pm")));
439        assert!(result.files.iter().any(|p| p.ends_with("script.pl")));
440
441        Ok(())
442    }
443
444    #[test]
445    fn walk_discovery_skips_all_six_noise_directories() -> TestResult {
446        let tmp = tempfile::tempdir()?;
447        let root = tmp.path();
448
449        create_file(root, ".git/hooks/hook.pm")?;
450        create_file(root, ".hg/config.pm")?;
451        create_file(root, ".svn/entries.pm")?;
452        create_file(root, "target/build/out.pm")?;
453        create_file(root, "node_modules/dep.pm")?;
454        create_file(root, ".cache/fast.pm")?;
455        create_file(root, "lib/Visible.pm")?;
456
457        let result = walk_discovery(root, Instant::now());
458        assert_eq!(result.files.len(), 1);
459        assert!(result.files[0].ends_with("lib/Visible.pm"));
460
461        Ok(())
462    }
463
464    #[test]
465    fn walk_discovery_records_duration() -> TestResult {
466        let tmp = tempfile::tempdir()?;
467        let result = walk_discovery(tmp.path(), Instant::now());
468        // Duration should be non-zero (or at least not panic)
469        let _ = result.duration.as_nanos();
470
471        Ok(())
472    }
473
474    #[test]
475    fn walk_discovery_ignores_subdirectories_themselves() -> TestResult {
476        let tmp = tempfile::tempdir()?;
477        let root = tmp.path();
478
479        // Create a directory that looks like a .pm file (edge case)
480        fs::create_dir_all(root.join("lib/Fake.pm/nested"))?;
481        create_file(root, "lib/Real.pm")?;
482
483        let result = walk_discovery(root, Instant::now());
484        // Only the actual file should be found, not the directory
485        assert_eq!(result.files.len(), 1);
486        assert!(result.files[0].ends_with("lib/Real.pm"));
487
488        Ok(())
489    }
490
491    // --- Additional coverage: should_skip_dir for non-directory entries ---
492
493    #[test]
494    fn should_skip_dir_returns_false_for_files() -> TestResult {
495        let tmp = tempfile::tempdir()?;
496        let root = tmp.path();
497
498        // Create a file (not a directory)
499        fs::write(root.join("target.txt"), "data")?;
500
501        for entry in walkdir::WalkDir::new(root).max_depth(1).into_iter().flatten() {
502            if entry.path() == root {
503                continue;
504            }
505            if entry.file_type().is_file() {
506                // Files should never be skipped by should_skip_dir
507                assert!(!should_skip_dir(&entry));
508            }
509        }
510
511        Ok(())
512    }
513
514    #[test]
515    fn should_skip_dir_covers_all_six_directories() -> TestResult {
516        let tmp = tempfile::tempdir()?;
517        let root = tmp.path();
518
519        let dirs = [".git", ".hg", ".svn", "target", "node_modules", ".cache"];
520        for d in dirs {
521            fs::create_dir_all(root.join(d))?;
522        }
523
524        let mut matched = 0usize;
525        for entry in walkdir::WalkDir::new(root).max_depth(1).into_iter().flatten() {
526            if entry.path() == root {
527                continue;
528            }
529            if entry.file_type().is_dir() {
530                let name = entry.file_name().to_string_lossy();
531                if dirs.contains(&name.as_ref()) {
532                    assert!(should_skip_dir(&entry), "expected {name} to be skipped");
533                    matched += 1;
534                }
535            }
536        }
537
538        assert_eq!(matched, dirs.len());
539        Ok(())
540    }
541
542    // --- Additional coverage: DiscoveryMethod traits ---
543
544    #[test]
545    fn discovery_method_debug_and_equality() {
546        let git = DiscoveryMethod::Git;
547        let walk = DiscoveryMethod::Walk;
548        let git2 = DiscoveryMethod::Git;
549
550        assert_eq!(git, git2);
551        assert_ne!(git, walk);
552        // Debug is derivable, just verify it doesn't panic
553        let _ = format!("{git:?}");
554        let _ = format!("{walk:?}");
555    }
556
557    #[test]
558    fn discovery_method_clone_and_copy() {
559        let original = DiscoveryMethod::Git;
560        let cloned = original;
561        let copied = original;
562
563        assert_eq!(original, cloned);
564        assert_eq!(original, copied);
565    }
566
567    // --- Additional coverage: DiscoveryResult ---
568
569    #[test]
570    fn discovery_result_clone_and_debug() -> TestResult {
571        let tmp = tempfile::tempdir()?;
572        let root = tmp.path();
573        create_file(root, "lib/Foo.pm")?;
574
575        let result = walk_discovery(root, Instant::now());
576        let cloned = result.clone();
577
578        assert_eq!(cloned.files.len(), result.files.len());
579        assert_eq!(cloned.method, result.method);
580        assert_eq!(cloned.excluded_count, result.excluded_count);
581        // Debug format should not panic
582        let _ = format!("{result:?}");
583
584        Ok(())
585    }
586
587    // --- Additional coverage: mixed Perl and non-Perl content ---
588
589    #[test]
590    fn walk_discovery_mixed_content_accurate_counts() -> TestResult {
591        let tmp = tempfile::tempdir()?;
592        let root = tmp.path();
593
594        // 3 Perl files
595        create_file(root, "lib/A.pm")?;
596        create_file(root, "bin/b.pl")?;
597        create_file(root, "t/c.t")?;
598        // 2 non-Perl files
599        create_file(root, "README.md")?;
600        create_file(root, "Makefile")?;
601
602        let result = walk_discovery(root, Instant::now());
603        assert_eq!(result.files.len(), 3);
604        assert_eq!(result.excluded_count, 2);
605
606        Ok(())
607    }
608
609    #[test]
610    fn parse_git_output_mixed_content_accurate_counts() {
611        let root = Path::new("/tmp/workspace");
612        let payload =
613            b"lib/A.pm\0bin/b.pl\0t/c.t\0app/d.psgi\0README.md\0Makefile\0node_modules/e.pm\0";
614
615        let (files, excluded_count) = parse_git_ls_files_output(root, payload);
616        assert_eq!(files.len(), 4);
617        // README.md + Makefile (non-perl) + node_modules/e.pm (skipped dir)
618        assert_eq!(excluded_count, 3);
619    }
620}