1#![deny(clippy::print_stderr, clippy::print_stdout)]
12#![cfg_attr(test, allow(clippy::print_stderr, clippy::print_stdout))]
13
14use perl_source_file::is_perl_source_path;
15use perl_workspace_ignore::{is_skipped_dir_name, path_contains_skipped_component};
16use std::path::{Path, PathBuf};
17use std::time::{Duration, Instant};
18use walkdir::{DirEntry, WalkDir};
19
20const GIT_LS_FILES_ARGS: [&str; 5] =
21 ["ls-files", "-z", "--cached", "--others", "--exclude-standard"];
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum DiscoveryMethod {
26 Git,
28 Walk,
30}
31
32#[derive(Debug, Clone)]
34pub struct DiscoveryResult {
35 pub files: Vec<PathBuf>,
37 pub method: DiscoveryMethod,
39 pub duration: Duration,
41 pub excluded_count: usize,
43}
44
45#[must_use]
51pub fn discover_perl_files(root: &Path) -> DiscoveryResult {
52 let start = Instant::now();
53
54 match try_git_discovery(root, start) {
55 Ok(result) => result,
56 Err(_) => walk_discovery(root, start),
57 }
58}
59
60fn try_git_discovery(root: &Path, start: Instant) -> Result<DiscoveryResult, std::io::Error> {
61 let output = std::process::Command::new("git")
62 .args(GIT_LS_FILES_ARGS)
63 .current_dir(root)
64 .stdout(std::process::Stdio::piped())
65 .stderr(std::process::Stdio::null())
66 .output()?;
67
68 if !output.status.success() {
69 return Err(std::io::Error::other("git ls-files failed"));
70 }
71
72 let (files, excluded_count) = parse_git_ls_files_output(root, &output.stdout);
73 let result = DiscoveryResult {
74 files,
75 method: DiscoveryMethod::Git,
76 duration: start.elapsed(),
77 excluded_count,
78 };
79
80 log_discovery(&result);
81 Ok(result)
82}
83
84fn parse_git_ls_files_output(root: &Path, stdout: &[u8]) -> (Vec<PathBuf>, usize) {
85 let stdout = String::from_utf8_lossy(stdout);
86 let mut files = Vec::new();
87 let mut excluded_count: usize = 0;
88
89 for entry in stdout.split('\0') {
90 if entry.is_empty() {
91 continue;
92 }
93
94 let relative_path = Path::new(entry);
95 if path_contains_skipped_component(relative_path) {
96 excluded_count += 1;
97 continue;
98 }
99
100 let path = root.join(relative_path);
101 if is_perl_source_path(&path) {
102 files.push(path);
103 } else {
104 excluded_count += 1;
105 }
106 }
107
108 (files, excluded_count)
109}
110
111fn walk_discovery(root: &Path, start: Instant) -> DiscoveryResult {
112 let mut files = Vec::new();
113 let mut excluded_count: usize = 0;
114
115 for entry in WalkDir::new(root)
116 .follow_links(false)
117 .into_iter()
118 .filter_entry(|entry| !should_skip_dir(entry))
119 {
120 let entry = match entry {
121 Ok(entry) => entry,
122 Err(_) => continue,
123 };
124
125 if !entry.file_type().is_file() {
126 continue;
127 }
128
129 if is_perl_source_path(entry.path()) {
130 files.push(entry.path().to_path_buf());
131 } else {
132 excluded_count += 1;
133 }
134 }
135
136 let result = DiscoveryResult {
137 files,
138 method: DiscoveryMethod::Walk,
139 duration: start.elapsed(),
140 excluded_count,
141 };
142
143 log_discovery(&result);
144 result
145}
146
147fn should_skip_dir(entry: &DirEntry) -> bool {
148 if !entry.file_type().is_dir() {
149 return false;
150 }
151
152 is_skipped_dir_name(&entry.file_name().to_string_lossy())
153}
154
155fn log_discovery(result: &DiscoveryResult) {
156 tracing::debug!(
157 files = result.files.len(),
158 method = ?result.method,
159 duration_ms = result.duration.as_secs_f64() * 1000.0,
160 excluded = result.excluded_count,
161 "workspace discovery complete"
162 );
163}
164
165#[cfg(test)]
166mod tests {
167 use super::{
168 DiscoveryMethod, parse_git_ls_files_output, path_contains_skipped_component,
169 should_skip_dir, walk_discovery,
170 };
171 use std::fs;
172 use std::path::Path;
173 use std::time::Instant;
174
175 type TestResult = Result<(), Box<dyn std::error::Error>>;
176
177 fn create_file(root: &Path, relative: &str) -> TestResult {
178 let path = root.join(relative);
179 if let Some(parent) = path.parent() {
180 fs::create_dir_all(parent)?;
181 }
182 fs::write(path, "# synthetic\n")?;
183 Ok(())
184 }
185
186 #[test]
187 fn parses_git_output_and_filters_entries() {
188 let root = Path::new("/tmp/workspace");
189 let payload = b"lib/Foo.pm\0README.md\0node_modules/pkg.pm\0script.pl\0";
190
191 let (files, excluded_count) = parse_git_ls_files_output(root, payload);
192
193 assert_eq!(files.len(), 2);
194 assert!(files.iter().any(|path| path.ends_with("lib/Foo.pm")));
195 assert!(files.iter().any(|path| path.ends_with("script.pl")));
196 assert_eq!(excluded_count, 2);
197 }
198
199 #[test]
200 fn skipped_component_detection_is_consistent() {
201 assert!(path_contains_skipped_component(Path::new("/repo/node_modules/pkg.pm")));
202 assert!(path_contains_skipped_component(Path::new("/repo/target/build/generated.pm")));
203 assert!(!path_contains_skipped_component(Path::new("/repo/lib/My/Module.pm")));
204 }
205
206 #[test]
207 fn parse_git_output_ignores_skipped_names_in_workspace_root_path() {
208 let root = Path::new("/tmp/target/workspace");
209 let payload = b"lib/Foo.pm\0";
210
211 let (files, excluded_count) = parse_git_ls_files_output(root, payload);
212
213 assert_eq!(files.len(), 1);
214 assert!(files[0].ends_with("lib/Foo.pm"));
215 assert_eq!(excluded_count, 0);
216 }
217
218 #[test]
219 fn walk_discovery_ignores_skipped_directories() -> TestResult {
220 let tmp = tempfile::tempdir()?;
221 let root = tmp.path();
222
223 create_file(root, "lib/Foo.pm")?;
224 create_file(root, "node_modules/pkg.pm")?;
225 create_file(root, "target/build/generated.pm")?;
226 create_file(root, ".cache/precompiled.pm")?;
227
228 let result = walk_discovery(root, Instant::now());
229 assert_eq!(result.method, DiscoveryMethod::Walk);
230 assert_eq!(result.files.len(), 1);
231 assert!(result.files[0].ends_with("lib/Foo.pm"));
232
233 Ok(())
234 }
235
236 #[test]
237 fn should_skip_dir_matches_conventional_noise_directories() -> TestResult {
238 let tmp = tempfile::tempdir()?;
239 let root = tmp.path();
240
241 fs::create_dir_all(root.join(".git"))?;
242 fs::create_dir_all(root.join("node_modules"))?;
243 fs::create_dir_all(root.join("src"))?;
244
245 let mut seen_git = false;
246 let mut seen_node_modules = false;
247 let mut seen_src = false;
248
249 for entry in walkdir::WalkDir::new(root).max_depth(1).into_iter().flatten() {
250 if entry.path() == root {
251 continue;
252 }
253 let name = entry.file_name().to_string_lossy();
254 match name.as_ref() {
255 ".git" => {
256 seen_git = true;
257 assert!(should_skip_dir(&entry));
258 }
259 "node_modules" => {
260 seen_node_modules = true;
261 assert!(should_skip_dir(&entry));
262 }
263 "src" => {
264 seen_src = true;
265 assert!(!should_skip_dir(&entry));
266 }
267 _ => {}
268 }
269 }
270
271 assert!(seen_git);
272 assert!(seen_node_modules);
273 assert!(seen_src);
274
275 Ok(())
276 }
277
278 #[test]
281 fn parse_git_output_empty_input_returns_nothing() {
282 let root = Path::new("/tmp/workspace");
283 let (files, excluded_count) = parse_git_ls_files_output(root, b"");
284 assert_eq!(files.len(), 0);
285 assert_eq!(excluded_count, 0);
286 }
287
288 #[test]
289 fn parse_git_output_only_null_separators() {
290 let root = Path::new("/tmp/workspace");
291 let (files, excluded_count) = parse_git_ls_files_output(root, b"\0\0\0");
292 assert_eq!(files.len(), 0);
293 assert_eq!(excluded_count, 0);
294 }
295
296 #[test]
297 fn parse_git_output_recognizes_all_perl_extensions() {
298 let root = Path::new("/tmp/workspace");
299 let payload = b"lib/Foo.pm\0scripts/run.pl\0t/basic.t\0app/main.psgi\0";
300 let (files, excluded_count) = parse_git_ls_files_output(root, payload);
301
302 assert_eq!(files.len(), 4);
303 assert!(files.iter().any(|p| p.ends_with("Foo.pm")));
304 assert!(files.iter().any(|p| p.ends_with("run.pl")));
305 assert!(files.iter().any(|p| p.ends_with("basic.t")));
306 assert!(files.iter().any(|p| p.ends_with("main.psgi")));
307 assert_eq!(excluded_count, 0);
308 }
309
310 #[test]
311 fn parse_git_output_counts_non_perl_as_excluded() {
312 let root = Path::new("/tmp/workspace");
313 let payload = b"README.md\0Makefile\0config.yaml\0";
314 let (files, excluded_count) = parse_git_ls_files_output(root, payload);
315
316 assert_eq!(files.len(), 0);
317 assert_eq!(excluded_count, 3);
318 }
319
320 #[test]
321 fn parse_git_output_excludes_all_skipped_directories() {
322 let root = Path::new("/tmp/workspace");
323 let payload = b".git/hooks/pre-commit.pl\0.hg/config.pm\0.svn/entries.pm\0target/out.pm\0node_modules/dep.pm\0.cache/fast.pm\0";
324 let (files, excluded_count) = parse_git_ls_files_output(root, payload);
325
326 assert_eq!(files.len(), 0);
327 assert_eq!(excluded_count, 6);
328 }
329
330 #[test]
331 fn parse_git_output_joins_root_to_relative_paths() {
332 let root = Path::new("/home/user/project");
333 let payload = b"lib/Module.pm\0";
334 let (files, _) = parse_git_ls_files_output(root, payload);
335
336 assert_eq!(files.len(), 1);
337 assert_eq!(files[0], Path::new("/home/user/project/lib/Module.pm"));
338 }
339
340 #[test]
343 fn skipped_component_detects_each_directory_individually() {
344 let skipped = [".git", ".hg", ".svn", "target", "node_modules", ".cache"];
345 for dir in skipped {
346 let path_str = format!("lib/{dir}/nested.pm");
347 assert!(
348 path_contains_skipped_component(Path::new(&path_str)),
349 "expected {dir} to be skipped"
350 );
351 }
352 }
353
354 #[test]
355 fn skipped_component_allows_safe_directories() {
356 let safe = ["lib", "src", "bin", "t", "scripts", "blib"];
357 for dir in safe {
358 let path_str = format!("{dir}/Module.pm");
359 assert!(
360 !path_contains_skipped_component(Path::new(&path_str)),
361 "expected {dir} to be allowed"
362 );
363 }
364 }
365
366 #[test]
367 fn skipped_component_empty_path_returns_false() {
368 assert!(!path_contains_skipped_component(Path::new("")));
369 }
370
371 #[test]
372 fn skipped_component_single_filename_returns_false() {
373 assert!(!path_contains_skipped_component(Path::new("Module.pm")));
374 }
375
376 #[test]
377 fn skipped_component_deeply_nested() {
378 assert!(path_contains_skipped_component(Path::new("a/b/c/node_modules/d/e/f.pm")));
379 }
380
381 #[test]
384 fn walk_discovery_empty_directory() -> TestResult {
385 let tmp = tempfile::tempdir()?;
386 let result = walk_discovery(tmp.path(), Instant::now());
387
388 assert_eq!(result.method, DiscoveryMethod::Walk);
389 assert_eq!(result.files.len(), 0);
390 assert_eq!(result.excluded_count, 0);
391
392 Ok(())
393 }
394
395 #[test]
396 fn walk_discovery_only_non_perl_files() -> TestResult {
397 let tmp = tempfile::tempdir()?;
398 let root = tmp.path();
399
400 create_file(root, "README.md")?;
401 create_file(root, "Makefile")?;
402 create_file(root, "config.yaml")?;
403
404 let result = walk_discovery(root, Instant::now());
405 assert_eq!(result.method, DiscoveryMethod::Walk);
406 assert_eq!(result.files.len(), 0);
407 assert_eq!(result.excluded_count, 3);
408
409 Ok(())
410 }
411
412 #[test]
413 fn walk_discovery_finds_all_perl_extensions() -> TestResult {
414 let tmp = tempfile::tempdir()?;
415 let root = tmp.path();
416
417 create_file(root, "lib/Foo.pm")?;
418 create_file(root, "bin/run.pl")?;
419 create_file(root, "t/basic.t")?;
420 create_file(root, "app/main.psgi")?;
421
422 let result = walk_discovery(root, Instant::now());
423 assert_eq!(result.files.len(), 4);
424
425 Ok(())
426 }
427
428 #[test]
429 fn walk_discovery_deeply_nested_perl_files() -> TestResult {
430 let tmp = tempfile::tempdir()?;
431 let root = tmp.path();
432
433 create_file(root, "a/b/c/d/e/Deep.pm")?;
434 create_file(root, "x/y/z/script.pl")?;
435
436 let result = walk_discovery(root, Instant::now());
437 assert_eq!(result.files.len(), 2);
438 assert!(result.files.iter().any(|p| p.ends_with("Deep.pm")));
439 assert!(result.files.iter().any(|p| p.ends_with("script.pl")));
440
441 Ok(())
442 }
443
444 #[test]
445 fn walk_discovery_skips_all_six_noise_directories() -> TestResult {
446 let tmp = tempfile::tempdir()?;
447 let root = tmp.path();
448
449 create_file(root, ".git/hooks/hook.pm")?;
450 create_file(root, ".hg/config.pm")?;
451 create_file(root, ".svn/entries.pm")?;
452 create_file(root, "target/build/out.pm")?;
453 create_file(root, "node_modules/dep.pm")?;
454 create_file(root, ".cache/fast.pm")?;
455 create_file(root, "lib/Visible.pm")?;
456
457 let result = walk_discovery(root, Instant::now());
458 assert_eq!(result.files.len(), 1);
459 assert!(result.files[0].ends_with("lib/Visible.pm"));
460
461 Ok(())
462 }
463
464 #[test]
465 fn walk_discovery_records_duration() -> TestResult {
466 let tmp = tempfile::tempdir()?;
467 let result = walk_discovery(tmp.path(), Instant::now());
468 let _ = result.duration.as_nanos();
470
471 Ok(())
472 }
473
474 #[test]
475 fn walk_discovery_ignores_subdirectories_themselves() -> TestResult {
476 let tmp = tempfile::tempdir()?;
477 let root = tmp.path();
478
479 fs::create_dir_all(root.join("lib/Fake.pm/nested"))?;
481 create_file(root, "lib/Real.pm")?;
482
483 let result = walk_discovery(root, Instant::now());
484 assert_eq!(result.files.len(), 1);
486 assert!(result.files[0].ends_with("lib/Real.pm"));
487
488 Ok(())
489 }
490
491 #[test]
494 fn should_skip_dir_returns_false_for_files() -> TestResult {
495 let tmp = tempfile::tempdir()?;
496 let root = tmp.path();
497
498 fs::write(root.join("target.txt"), "data")?;
500
501 for entry in walkdir::WalkDir::new(root).max_depth(1).into_iter().flatten() {
502 if entry.path() == root {
503 continue;
504 }
505 if entry.file_type().is_file() {
506 assert!(!should_skip_dir(&entry));
508 }
509 }
510
511 Ok(())
512 }
513
514 #[test]
515 fn should_skip_dir_covers_all_six_directories() -> TestResult {
516 let tmp = tempfile::tempdir()?;
517 let root = tmp.path();
518
519 let dirs = [".git", ".hg", ".svn", "target", "node_modules", ".cache"];
520 for d in dirs {
521 fs::create_dir_all(root.join(d))?;
522 }
523
524 let mut matched = 0usize;
525 for entry in walkdir::WalkDir::new(root).max_depth(1).into_iter().flatten() {
526 if entry.path() == root {
527 continue;
528 }
529 if entry.file_type().is_dir() {
530 let name = entry.file_name().to_string_lossy();
531 if dirs.contains(&name.as_ref()) {
532 assert!(should_skip_dir(&entry), "expected {name} to be skipped");
533 matched += 1;
534 }
535 }
536 }
537
538 assert_eq!(matched, dirs.len());
539 Ok(())
540 }
541
542 #[test]
545 fn discovery_method_debug_and_equality() {
546 let git = DiscoveryMethod::Git;
547 let walk = DiscoveryMethod::Walk;
548 let git2 = DiscoveryMethod::Git;
549
550 assert_eq!(git, git2);
551 assert_ne!(git, walk);
552 let _ = format!("{git:?}");
554 let _ = format!("{walk:?}");
555 }
556
557 #[test]
558 fn discovery_method_clone_and_copy() {
559 let original = DiscoveryMethod::Git;
560 let cloned = original;
561 let copied = original;
562
563 assert_eq!(original, cloned);
564 assert_eq!(original, copied);
565 }
566
567 #[test]
570 fn discovery_result_clone_and_debug() -> TestResult {
571 let tmp = tempfile::tempdir()?;
572 let root = tmp.path();
573 create_file(root, "lib/Foo.pm")?;
574
575 let result = walk_discovery(root, Instant::now());
576 let cloned = result.clone();
577
578 assert_eq!(cloned.files.len(), result.files.len());
579 assert_eq!(cloned.method, result.method);
580 assert_eq!(cloned.excluded_count, result.excluded_count);
581 let _ = format!("{result:?}");
583
584 Ok(())
585 }
586
587 #[test]
590 fn walk_discovery_mixed_content_accurate_counts() -> TestResult {
591 let tmp = tempfile::tempdir()?;
592 let root = tmp.path();
593
594 create_file(root, "lib/A.pm")?;
596 create_file(root, "bin/b.pl")?;
597 create_file(root, "t/c.t")?;
598 create_file(root, "README.md")?;
600 create_file(root, "Makefile")?;
601
602 let result = walk_discovery(root, Instant::now());
603 assert_eq!(result.files.len(), 3);
604 assert_eq!(result.excluded_count, 2);
605
606 Ok(())
607 }
608
609 #[test]
610 fn parse_git_output_mixed_content_accurate_counts() {
611 let root = Path::new("/tmp/workspace");
612 let payload =
613 b"lib/A.pm\0bin/b.pl\0t/c.t\0app/d.psgi\0README.md\0Makefile\0node_modules/e.pm\0";
614
615 let (files, excluded_count) = parse_git_ls_files_output(root, payload);
616 assert_eq!(files.len(), 4);
617 assert_eq!(excluded_count, 3);
619 }
620}