1#![doc = include_str!("../README.md")]
2
3use ignore::gitignore::GitignoreBuilder;
4use ignore::Match;
5use std::collections::HashSet;
6use std::fs;
7use std::io::{self, BufReader, Read};
8use std::path::{Path, PathBuf};
9use std::process::{Command, Output};
10use thiserror::Error;
11use walkdir::WalkDir;
12
13use log::{debug, error, info, warn};
15
16#[derive(Debug, Clone)]
25pub struct GrabConfig {
26 pub target_path: PathBuf,
29
30 pub add_headers: bool,
34
35 pub exclude_patterns: Vec<String>,
41
42 pub include_untracked: bool,
47}
48
49#[derive(Error, Debug)]
56pub enum GrabError {
57 #[error("Target path not found or not accessible: {0}")]
60 TargetPathNotFound(PathBuf),
61
62 #[error("IO error accessing path '{path}': {source}")]
65 IoError {
66 path: PathBuf,
67 #[source]
68 source: io::Error,
69 },
70
71 #[error("Failed to execute git command: {command:?}\n stderr: {stderr}\n stdout: {stdout}")]
75 GitCommandError {
76 command: String,
77 stderr: String,
78 stdout: String,
79 },
80
81 #[error("Failed to run git command '{command}': {source}")]
85 GitExecutionError {
86 command: String,
87 #[source]
88 source: io::Error,
89 },
90
91 #[error("Failed to read non-UTF8 file: {0}")]
96 NonUtf8File(PathBuf),
97
98 #[error("Could not determine repository root for: {0}")]
103 RepoRootNotFound(PathBuf),
104
105 #[error("Failed to build glob pattern matcher: {0}")]
109 GlobMatcherBuildError(#[source] ignore::Error),
110
111 #[error("Error walking directory {path_display}: {source_str}")]
115 WalkdirError {
116 path_display: String, source_str: String, },
119}
120
121pub type GrabResult<T> = Result<T, GrabError>;
123
124pub fn grab_contents(config: &GrabConfig) -> GrabResult<String> {
201 info!("Starting dirgrab operation with config: {:?}", config);
202
203 let target_path = config.target_path.canonicalize().map_err(|e| {
205 if e.kind() == io::ErrorKind::NotFound {
207 GrabError::TargetPathNotFound(config.target_path.clone())
208 } else {
209 GrabError::IoError {
210 path: config.target_path.clone(),
211 source: e,
212 }
213 }
214 })?;
215 debug!("Canonical target path: {:?}", target_path);
216
217 let git_repo_root = detect_git_repo(&target_path)?;
219
220 let files_to_process = match &git_repo_root {
222 Some(root) => {
223 info!("Operating in Git mode. Repo root: {:?}", root);
224 list_files_git(root, config)?
225 }
226 None => {
227 info!("Operating in Non-Git mode. Target path: {:?}", target_path);
228 list_files_walkdir(&target_path, config)?
229 }
230 };
231
232 info!("Found {} files to process.", files_to_process.len());
233 if files_to_process.is_empty() {
234 warn!("No files selected for processing based on current configuration.");
235 return Ok(String::new()); }
237
238 process_files(
240 &files_to_process,
241 config.add_headers,
242 git_repo_root.as_deref(),
243 )
244}
245
246fn detect_git_repo(path: &Path) -> GrabResult<Option<PathBuf>> {
252 let command_str = "git rev-parse --show-toplevel";
253 debug!(
254 "Detecting git repo by running '{}' in path: {:?}",
255 command_str, path
256 );
257
258 let output = match run_command("git", &["rev-parse", "--show-toplevel"], path) {
260 Ok(output) => output,
261 Err(GrabError::GitExecutionError { ref source, .. })
262 if source.kind() == io::ErrorKind::NotFound =>
263 {
264 info!("'git' command not found. Assuming Non-Git mode.");
266 return Ok(None);
267 }
268 Err(e) => return Err(e), };
270
271 if output.status.success() {
272 let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
273 if !stdout.is_empty() {
274 let root_path =
276 PathBuf::from(stdout)
277 .canonicalize()
278 .map_err(|e| GrabError::IoError {
279 path: PathBuf::from("detected git root"),
280 source: e,
281 })?;
282 debug!("Detected Git repo root: {:?}", root_path);
283 Ok(Some(root_path))
284 } else {
285 warn!(
287 "'{}' succeeded but returned empty output in {:?}. Treating as Non-Git mode.",
288 command_str, path
289 );
290 Ok(None)
291 }
292 } else {
293 let stderr = String::from_utf8_lossy(&output.stderr);
294 if stderr.contains("not a git repository")
296 || stderr.contains("fatal: detected dubious ownership in repository at")
297 {
298 debug!(
299 "Path is not inside a Git repository (based on stderr): {:?}",
300 path
301 );
302 Ok(None)
303 } else {
304 let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
306 error!(
307 "Git command '{}' failed unexpectedly.\nStderr: {}\\nStdout: {}",
308 command_str, stderr, stdout
309 );
310 Err(GrabError::GitCommandError {
311 command: command_str.to_string(),
312 stderr: stderr.into_owned(),
313 stdout,
314 })
315 }
316 }
317}
318
319fn list_files_git(repo_root: &Path, config: &GrabConfig) -> GrabResult<Vec<PathBuf>> {
321 debug!("Listing files using Git in root: {:?}", repo_root);
322
323 let base_args = ["ls-files", "-z"]; let exclude_pathspecs: Vec<String> = config
325 .exclude_patterns
326 .iter()
327 .map(|p| format!(":!{}", p)) .collect();
329 let exclude_refs: Vec<&str> = exclude_pathspecs.iter().map(AsRef::as_ref).collect();
330
331 let mut combined_files = HashSet::new(); let mut tracked_args = base_args.to_vec();
335 tracked_args.extend_from_slice(&exclude_refs);
336 let tracked_command_str = format!("git {}", tracked_args.join(" "));
337 debug!(
338 "Running git command for tracked files: {}",
339 tracked_command_str
340 );
341 let tracked_output = run_command("git", &tracked_args, repo_root)?;
342 if !tracked_output.status.success() {
343 let stderr = String::from_utf8_lossy(&tracked_output.stderr).into_owned();
344 let stdout = String::from_utf8_lossy(&tracked_output.stdout).into_owned();
345 error!(
346 "git ls-files command (tracked) failed.\nStderr: {}\nStdout: {}",
347 stderr, stdout
348 );
349 return Err(GrabError::GitCommandError {
350 command: tracked_command_str,
351 stderr,
352 stdout,
353 });
354 }
355 String::from_utf8_lossy(&tracked_output.stdout)
357 .split('\0')
358 .filter(|s| !s.is_empty())
359 .for_each(|s| {
360 combined_files.insert(repo_root.join(s));
361 });
362
363 if config.include_untracked {
365 let mut untracked_args = base_args.to_vec();
366 untracked_args.push("--others"); untracked_args.push("--exclude-standard"); untracked_args.extend_from_slice(&exclude_refs); let untracked_command_str = format!("git {}", untracked_args.join(" "));
370 debug!(
371 "Running git command for untracked files: {}",
372 untracked_command_str
373 );
374 let untracked_output = run_command("git", &untracked_args, repo_root)?;
375
376 if !untracked_output.status.success() {
377 let stderr = String::from_utf8_lossy(&untracked_output.stderr).into_owned();
378 let stdout = String::from_utf8_lossy(&untracked_output.stdout).into_owned();
379 error!(
380 "git ls-files command (untracked) failed.\nStderr: {}\nStdout: {}",
381 stderr, stdout
382 );
383 return Err(GrabError::GitCommandError {
384 command: untracked_command_str,
385 stderr,
386 stdout,
387 });
388 }
389 String::from_utf8_lossy(&untracked_output.stdout)
391 .split('\0')
392 .filter(|s| !s.is_empty())
393 .for_each(|s| {
394 combined_files.insert(repo_root.join(s));
395 });
396 }
397
398 let files_vec = combined_files.into_iter().collect();
400 Ok(files_vec)
401}
402
403fn list_files_walkdir(target_path: &Path, config: &GrabConfig) -> GrabResult<Vec<PathBuf>> {
405 debug!("Listing files using walkdir starting at: {:?}", target_path);
406 let mut files = Vec::new();
407
408 let mut exclude_builder = GitignoreBuilder::new(target_path);
410 for pattern in &config.exclude_patterns {
411 if let Err(e) = exclude_builder.add_line(None, pattern) {
412 error!(
414 "Failed to add exclude pattern '{}': {}. This pattern will be ignored.",
415 pattern, e
416 );
417 }
418 }
419 let exclude_matcher = exclude_builder
420 .build()
421 .map_err(GrabError::GlobMatcherBuildError)?;
422
423 for entry_result in WalkDir::new(target_path) {
425 let entry = match entry_result {
426 Ok(entry) => entry,
427 Err(e) => {
428 let path_display = e.path().map_or_else(
430 || target_path.display().to_string(),
431 |p| p.display().to_string(),
432 );
433 warn!(
434 "Skipping path due to error during walk near {}: {}",
435 path_display, e
436 );
437 continue;
438 }
439 };
440
441 let path = entry.path();
442
443 if !entry.file_type().is_file() {
445 continue;
446 }
447
448 match exclude_matcher.matched_path_or_any_parents(path, false) {
450 Match::None | Match::Whitelist(_) => {
451 files.push(path.to_path_buf());
453 }
454 Match::Ignore(_) => {
455 debug!("Excluding file due to pattern (walkdir): {:?} matching pattern for path or parent", path);
457 continue; }
459 }
460 } Ok(files)
463}
464
465fn process_files(
468 files: &[PathBuf],
469 add_headers: bool,
470 repo_root: Option<&Path>,
471) -> GrabResult<String> {
472 debug!("Processing {} files.", files.len());
473 let mut combined_content = String::with_capacity(files.len() * 1024); let mut buffer = Vec::new(); for file_path in files {
477 debug!("Processing file: {:?}", file_path);
478
479 if add_headers {
481 let display_path = repo_root
483 .and_then(|root| file_path.strip_prefix(root).ok()) .unwrap_or(file_path); combined_content.push_str(&format!("--- FILE: {} ---\n", display_path.display()));
487 }
488
489 buffer.clear(); match fs::File::open(file_path) {
492 Ok(file) => {
493 let mut reader = BufReader::new(file);
494 match reader.read_to_end(&mut buffer) {
495 Ok(_) => {
496 match String::from_utf8(buffer.clone()) {
498 Ok(content) => {
500 combined_content.push_str(&content);
501 if !content.ends_with('\n') {
503 combined_content.push('\n');
504 }
505 combined_content.push('\n');
507 }
508 Err(_) => {
509 warn!("Skipping non-UTF8 file: {:?}", file_path);
511 }
512 }
513 }
514 Err(e) => {
515 warn!("Skipping file due to read error: {:?} - {}", file_path, e);
517 }
518 }
519 }
520 Err(e) => {
521 warn!("Skipping file due to open error: {:?} - {}", file_path, e);
523 }
524 }
525 }
526
527 Ok(combined_content)
528}
529
530fn run_command(cmd: &str, args: &[&str], current_dir: &Path) -> GrabResult<Output> {
532 debug!(
533 "Running command: {} {:?} in directory: {:?}",
534 cmd, args, current_dir
535 );
536 let output = Command::new(cmd)
537 .args(args)
538 .current_dir(current_dir) .output()
540 .map_err(|e| {
542 let command_string = format!("{} {}", cmd, args.join(" "));
543 if e.kind() == std::io::ErrorKind::NotFound {
544 error!(
546 "Command '{}' not found. Is '{}' installed and in your system's PATH?",
547 command_string, cmd
548 );
549 }
550 GrabError::GitExecutionError {
552 command: command_string,
553 source: e,
554 }
555 })?;
556
557 Ok(output)
559}
560
561#[cfg(test)]
563mod tests {
564 use super::*;
565 use anyhow::Result;
566 use std::collections::HashSet; use std::fs::{self};
568 use std::path::Path; use std::process::Command;
570 use tempfile::{tempdir, TempDir};
571
572 fn setup_test_dir() -> Result<(TempDir, PathBuf)> {
574 let dir = tempdir()?;
575 let path = dir.path().to_path_buf();
576
577 fs::write(path.join("file1.txt"), "Content of file 1.")?;
578 fs::write(path.join("file2.rs"), "fn main() {}")?;
579 fs::create_dir(path.join("subdir"))?;
580 fs::write(path.join("subdir").join("file3.log"), "Log message.")?;
581 fs::write(
582 path.join("subdir").join("another.txt"),
583 "Another text file.",
584 )?;
585 fs::write(path.join("binary.dat"), [0x80, 0x81, 0x82])?;
586
587 Ok((dir, path))
588 }
589
590 fn setup_git_repo(path: &Path) -> Result<bool> {
593 if Command::new("git").arg("--version").output().is_err() {
594 eprintln!("WARN: 'git' command not found, skipping Git-related test setup.");
595 return Ok(false); }
597
598 run_command_test("git", &["init", "-b", "main"], path)?;
599 run_command_test("git", &["config", "user.email", "test@example.com"], path)?;
600 run_command_test("git", &["config", "user.name", "Test User"], path)?;
601
602 fs::write(path.join(".gitignore"), "*.log\nbinary.dat\nfile1.txt")?;
605
606 run_command_test(
607 "git",
608 &["add", ".gitignore", "file2.rs", "subdir/another.txt"],
609 path,
610 )?; run_command_test("git", &["commit", "-m", "Initial commit"], path)?;
614
615 fs::write(path.join("untracked.txt"), "This file is not tracked.")?;
617 fs::write(path.join("ignored.log"), "This should be ignored by git.")?; Ok(true) }
622
623 fn run_command_test(cmd: &str, args: &[&str], current_dir: &Path) -> Result<Output> {
625 println!(
626 "Running test command: {} {:?} in {:?}",
627 cmd, args, current_dir
628 );
629 let output = Command::new(cmd)
630 .args(args)
631 .current_dir(current_dir)
632 .output()?;
633
634 if !output.status.success() {
635 let stderr = String::from_utf8_lossy(&output.stderr);
636 let stdout = String::from_utf8_lossy(&output.stdout);
637 anyhow::bail!(
638 "Command failed: {} {:?}\nStatus: {}\nStdout: {}\nStderr: {}",
639 cmd,
640 args,
641 output.status,
642 stdout,
643 stderr
644 );
645 }
646 Ok(output)
647 }
648
649 fn get_expected_set(base_path: &Path, relative_paths: &[&str]) -> HashSet<PathBuf> {
652 relative_paths.iter().map(|p| base_path.join(p)).collect()
653 }
654
655 fn assert_paths_eq(actual: Vec<PathBuf>, expected: HashSet<PathBuf>) {
656 let actual_set: HashSet<PathBuf> = actual.into_iter().collect();
657 assert_eq!(actual_set, expected);
658 }
659
660 #[test]
661 fn test_detect_git_repo_inside() -> Result<()> {
662 let (_dir, path) = setup_test_dir()?;
663 if !setup_git_repo(&path)? {
664 return Ok(());
665 } let maybe_root = detect_git_repo(&path)?;
668 assert!(maybe_root.is_some(), "Should detect git repo");
669 assert_eq!(maybe_root.unwrap().canonicalize()?, path.canonicalize()?);
670
671 let subdir_path = path.join("subdir");
672 let maybe_root_from_subdir = detect_git_repo(&subdir_path)?;
673 assert!(
674 maybe_root_from_subdir.is_some(),
675 "Should detect git repo from subdir"
676 );
677 assert_eq!(
678 maybe_root_from_subdir.unwrap().canonicalize()?,
679 path.canonicalize()?
680 );
681
682 Ok(())
683 }
684
685 #[test]
686 fn test_detect_git_repo_outside() -> Result<()> {
687 let (_dir, path) = setup_test_dir()?;
688
689 let maybe_root = detect_git_repo(&path)?;
690 assert!(maybe_root.is_none(), "Should not detect git repo");
691 Ok(())
692 }
693
694 #[test]
695 fn test_list_files_walkdir_no_exclude() -> Result<()> {
696 let (_dir, path) = setup_test_dir()?;
697 let config = GrabConfig {
698 target_path: path.clone(),
699 add_headers: false,
700 exclude_patterns: vec![],
701 include_untracked: false,
702 };
703
704 let files = list_files_walkdir(&path, &config)?;
705
706 let expected_set = get_expected_set(
707 &path,
708 &[
709 "file1.txt",
710 "file2.rs",
711 "subdir/file3.log",
712 "subdir/another.txt",
713 "binary.dat",
714 ],
715 );
716 assert_paths_eq(files, expected_set);
717 Ok(())
718 }
719
720 #[test]
721 fn test_list_files_walkdir_with_exclude() -> Result<()> {
722 let (_dir, path) = setup_test_dir()?;
723 let config = GrabConfig {
724 target_path: path.clone(),
725 add_headers: false,
726 exclude_patterns: vec!["*.log".to_string(), "subdir/".to_string()],
727 include_untracked: false,
728 };
729
730 let files = list_files_walkdir(&path, &config)?;
731
732 let expected_set = get_expected_set(&path, &["file1.txt", "file2.rs", "binary.dat"]);
733 assert_paths_eq(files, expected_set);
734 Ok(())
735 }
736
737 #[test]
740 fn test_list_files_git_tracked_only() -> Result<()> {
741 let (_dir, path) = setup_test_dir()?;
742 if !setup_git_repo(&path)? {
743 return Ok(());
744 } let config = GrabConfig {
747 target_path: path.clone(), add_headers: false,
749 exclude_patterns: vec![],
750 include_untracked: false, };
752
753 let files = list_files_git(&path, &config)?;
754
755 let expected_set =
757 get_expected_set(&path, &[".gitignore", "file2.rs", "subdir/another.txt"]);
758
759 println!("Git tracked files found: {:?}", files);
760 assert_paths_eq(files, expected_set);
761 Ok(())
762 }
763
764 #[test]
765 fn test_list_files_git_include_untracked() -> Result<()> {
766 let (_dir, path) = setup_test_dir()?;
767 if !setup_git_repo(&path)? {
768 return Ok(());
769 } let config = GrabConfig {
772 target_path: path.clone(),
773 add_headers: false,
774 exclude_patterns: vec![],
775 include_untracked: true, };
777
778 let files = list_files_git(&path, &config)?;
779
780 let expected_set = get_expected_set(
783 &path,
784 &[
785 ".gitignore",
786 "file2.rs",
787 "subdir/another.txt",
788 "untracked.txt", ],
790 );
791
792 println!("Git tracked+untracked files found: {:?}", files);
793 assert_paths_eq(files, expected_set);
794 Ok(())
795 }
796
797 #[test]
798 fn test_list_files_git_with_exclude() -> Result<()> {
799 let (_dir, path) = setup_test_dir()?;
800 if !setup_git_repo(&path)? {
801 return Ok(());
802 } let config = GrabConfig {
805 target_path: path.clone(),
806 add_headers: false,
807 exclude_patterns: vec!["*.rs".to_string(), "subdir/".to_string()],
809 include_untracked: false, };
811
812 let files = list_files_git(&path, &config)?;
813
814 let expected_set = get_expected_set(&path, &[".gitignore"]);
816
817 println!("Git tracked files (with exclude) found: {:?}", files);
818 assert_paths_eq(files, expected_set);
819 Ok(())
820 }
821
822 #[test]
823 fn test_list_files_git_untracked_with_exclude() -> Result<()> {
824 let (_dir, path) = setup_test_dir()?;
825 if !setup_git_repo(&path)? {
826 return Ok(());
827 } let config = GrabConfig {
830 target_path: path.clone(),
831 add_headers: false,
832 exclude_patterns: vec!["*.txt".to_string()],
834 include_untracked: true, };
836
837 let files = list_files_git(&path, &config)?;
838
839 let expected_set = get_expected_set(&path, &[".gitignore", "file2.rs"]);
842
843 println!(
844 "Git tracked+untracked (with exclude) files found: {:?}",
845 files
846 );
847 assert_paths_eq(files, expected_set);
848 Ok(())
849 }
850
851 #[test]
854 fn test_process_files_no_headers_skip_binary() -> Result<()> {
855 let (_dir, path) = setup_test_dir()?;
856 let files_to_process = vec![
857 path.join("file1.txt"),
858 path.join("binary.dat"),
859 path.join("file2.rs"),
860 ];
861
862 let result = process_files(&files_to_process, false, None)?;
863
864 let expected_content = "Content of file 1.\n\nfn main() {}\n\n";
865
866 assert_eq!(result.trim(), expected_content.trim());
867
868 Ok(())
869 }
870
871 #[test]
872 fn test_process_files_with_headers() -> Result<()> {
873 let (_dir, path) = setup_test_dir()?;
874 let files_to_process = vec![path.join("file1.txt"), path.join("file2.rs")];
875
876 let repo_root = Some(path.as_path());
877
878 let result = process_files(&files_to_process, true, repo_root)?;
879
880 let expected_content = format!(
881 "--- FILE: {} ---\nContent of file 1.\n\n--- FILE: {} ---\nfn main() {{}}\n\n",
882 Path::new("file1.txt").display(), Path::new("file2.rs").display()
884 );
885
886 assert_eq!(result.trim(), expected_content.trim());
887
888 Ok(())
889 }
890}