dirgrab_lib/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use ignore::gitignore::GitignoreBuilder;
4use ignore::Match;
5use std::collections::HashSet;
6use std::fs;
7use std::io::{self, BufReader, Read};
8use std::path::{Path, PathBuf};
9use std::process::{Command, Output};
10use thiserror::Error;
11use walkdir::WalkDir;
12
13// Re-export log macros for convenience if used internally
14use log::{debug, error, info, warn};
15
16// --- Public Configuration Struct ---
17
18/// Configuration for the dirgrab operation.
19///
20/// This struct holds all the settings needed to control how `dirgrab`
21/// finds and processes files within the specified target directory.
22/// It is typically constructed by the calling application (e.g., the CLI)
23/// based on user input.
24#[derive(Debug, Clone)]
25pub struct GrabConfig {
26    /// The starting path for the operation (directory or Git repository).
27    /// `dirgrab` will operate within this path. It will be canonicalized internally.
28    pub target_path: PathBuf,
29
30    /// If true, adds `'--- FILE: <filename> ---'` headers before each file's content
31    /// in the final output string. The filename displayed will be relative to the
32    /// Git repository root (if applicable) or the target path.
33    pub add_headers: bool,
34
35    /// A list of glob patterns (using .gitignore syntax) to exclude files or directories.
36    /// These patterns are applied *in addition* to any `.gitignore` rules if operating
37    /// in Git mode.
38    /// In Git mode, they are passed to `git ls-files` as `:!<pattern>` pathspecs.
39    /// In non-Git mode, they are used to filter the results from walking the directory.
40    pub exclude_patterns: Vec<String>,
41
42    /// If operating in Git mode, set this to true to include untracked files
43    /// (files present in the working directory but not added to the index).
44    /// This still respects `.gitignore` and the `exclude_patterns`.
45    /// This setting has no effect if the `target_path` is not part of a Git repository.
46    pub include_untracked: bool,
47}
48
49// --- Public Error Enum ---
50
51/// Errors that can occur during the `dirgrab` library operations.
52///
53/// These errors cover issues ranging from file system access problems
54/// to Git command failures and configuration errors.
55#[derive(Error, Debug)]
56pub enum GrabError {
57    /// The initial `target_path` provided in the `GrabConfig` was not found
58    /// on the filesystem or was inaccessible due to permissions.
59    #[error("Target path not found or not accessible: {0}")]
60    TargetPathNotFound(PathBuf),
61
62    /// An I/O error occurred while accessing a path during the operation
63    /// (e.g., reading a file, canonicalizing a path).
64    #[error("IO error accessing path '{path}': {source}")]
65    IoError {
66        path: PathBuf,
67        #[source]
68        source: io::Error,
69    },
70
71    /// A `git` command (like `git ls-files` or `git rev-parse`) failed to execute
72    /// successfully, indicated by a non-zero exit status.
73    /// Contains the command string, stderr, and stdout output for debugging.
74    #[error("Failed to execute git command: {command:?}\n  stderr: {stderr}\n  stdout: {stdout}")]
75    GitCommandError {
76        command: String,
77        stderr: String,
78        stdout: String,
79    },
80
81    /// An error occurred while trying to spawn or run the `git` process itself.
82    /// This commonly happens if `git` is not installed or not found in the system's PATH,
83    /// but can also indicate permission errors preventing execution.
84    #[error("Failed to run git command '{command}': {source}")]
85    GitExecutionError {
86        command: String,
87        #[source]
88        source: io::Error,
89    },
90
91    /// A file identified for processing could not be read as valid UTF-8 data.
92    /// This usually indicates a binary file. In the default implementation of
93    /// `grab_contents`, such files are logged as a warning and skipped, rather
94    /// than returning this error directly.
95    #[error("Failed to read non-UTF8 file: {0}")]
96    NonUtf8File(PathBuf),
97
98    /// Although `detect_git_repo` attempts to handle cases where a path is not
99    /// in a repository gracefully (by returning `Ok(None)`), this error might
100    /// occur if an unexpected issue prevents determining the root definitively.
101    /// (Note: Current implementation less likely to return this specific variant).
102    #[error("Could not determine repository root for: {0}")]
103    RepoRootNotFound(PathBuf),
104
105    /// Failed to build the glob pattern matcher from the patterns provided
106    /// in `GrabConfig::exclude_patterns`. This might happen if a pattern has
107    /// invalid syntax according to the `ignore` crate.
108    #[error("Failed to build glob pattern matcher: {0}")]
109    GlobMatcherBuildError(#[source] ignore::Error),
110
111    /// An error occurred during directory traversal when operating in non-Git mode,
112    /// likely related to permissions or accessing a specific directory entry.
113    /// The default behavior logs a warning and skips the problematic entry.
114    #[error("Error walking directory {path_display}: {source_str}")]
115    WalkdirError {
116        path_display: String, // Displayable path near the error
117        source_str: String,   // The underlying error message from walkdir
118    },
119}
120
121/// A convenience type alias for `Result<T, GrabError>`.
122pub type GrabResult<T> = Result<T, GrabError>;
123
124// --- Main Public Function ---
125
126/// Performs the main `dirgrab` operation based on the provided configuration.
127///
128/// This function serves as the primary entry point into the `dirgrab-lib` core logic.
129/// It reads files from the specified target directory, intelligently determining
130/// whether to use Git context (`git ls-files`) or standard directory walking.
131///
132/// It applies exclusion patterns (`.gitignore` implicitly in Git mode, plus explicit
133/// patterns from `GrabConfig`), concatenates the UTF-8 content of the selected files,
134/// and optionally adds headers between file contents.
135///
136/// Non-UTF8/binary files encountered during processing are skipped with a warning message
137/// logged via the `log` crate (level `WARN`). File system errors during reading
138/// individual files are also typically logged as warnings, allowing the process to
139/// continue with other files. More critical errors (like inability to run `git`,
140/// invalid target path, or pattern compilation issues) will result in an `Err` return.
141///
142/// # Arguments
143///
144/// * `config`: A reference to a [`GrabConfig`] struct containing the parameters for
145///   the operation, such as the target path, exclusion rules, and header preferences.
146///
147/// # Returns
148///
149/// * `Ok(String)`: A single `String` containing the concatenated UTF-8 content of
150///   all selected and successfully read files. If no files are selected or readable,
151///   this will be an empty string.
152/// * `Err(GrabError)`: An error occurred that prevented the operation from completing
153///   successfully. See [`GrabError`] for possible variants.
154///
155/// # Errors
156///
157/// This function can return various [`GrabError`] variants, including:
158/// * [`GrabError::TargetPathNotFound`]: If the starting path doesn't exist or is inaccessible.
159/// * [`GrabError::IoError`]: For general I/O issues (e.g., canonicalization).
160/// * [`GrabError::GitCommandError`]: If a `git` command fails unexpectedly.
161/// * [`GrabError::GitExecutionError`]: If the `git` executable cannot be run.
162/// * [`GrabError::GlobMatcherBuildError`]: If exclude patterns are invalid.
163///
164/// Note that errors reading individual files or encountering non-UTF8 files typically
165/// result in logged warnings rather than returning an `Err`, allowing the function
166/// to process as many files as possible.
167///
168/// # Examples
169///
170/// ```no_run
171/// use dirgrab_lib::{GrabConfig, grab_contents, GrabError};
172/// use std::path::PathBuf;
173///
174/// fn run_dirgrab() -> Result<String, GrabError> {
175///     let config = GrabConfig {
176///         target_path: PathBuf::from("./my_project"), // Target a specific project
177///         add_headers: false,                        // Don't include headers
178///         exclude_patterns: vec!["target/".to_string()], // Exclude the target dir
179///         include_untracked: true,                   // Include untracked files if it's a Git repo
180///     };
181///
182///     grab_contents(&config)
183/// }
184///
185/// match run_dirgrab() {
186///     Ok(content) => {
187///         println!("Successfully grabbed content ({} bytes).", content.len());
188///         // Example: copy to clipboard or send to LLM
189///         // use arboard::Clipboard;
190///         // if let Ok(mut ctx) = Clipboard::new() {
191///         //     ctx.set_text(content).expect("Failed to set clipboard");
192///         // }
193///     }
194///     Err(e) => {
195///         eprintln!("Error running dirgrab: {}", e);
196///         // Handle the error appropriately
197///     }
198/// }
199/// ```
200pub fn grab_contents(config: &GrabConfig) -> GrabResult<String> {
201    info!("Starting dirgrab operation with config: {:?}", config);
202
203    // Canonicalize cleans the path and checks existence implicitly via OS call
204    let target_path = config.target_path.canonicalize().map_err(|e| {
205        // Provide a slightly better error if the root cause is NotFound
206        if e.kind() == io::ErrorKind::NotFound {
207            GrabError::TargetPathNotFound(config.target_path.clone())
208        } else {
209            GrabError::IoError {
210                path: config.target_path.clone(),
211                source: e,
212            }
213        }
214    })?;
215    debug!("Canonical target path: {:?}", target_path);
216
217    // 1. Detect Git repository and root
218    let git_repo_root = detect_git_repo(&target_path)?;
219
220    // 2. List files based on mode (Git vs. Non-Git)
221    let files_to_process = match &git_repo_root {
222        Some(root) => {
223            info!("Operating in Git mode. Repo root: {:?}", root);
224            list_files_git(root, config)?
225        }
226        None => {
227            info!("Operating in Non-Git mode. Target path: {:?}", target_path);
228            list_files_walkdir(&target_path, config)?
229        }
230    };
231
232    info!("Found {} files to process.", files_to_process.len());
233    if files_to_process.is_empty() {
234        warn!("No files selected for processing based on current configuration.");
235        return Ok(String::new()); // Return empty string if no files
236    }
237
238    // 3. Process (read and concatenate) the files
239    process_files(
240        &files_to_process,
241        config.add_headers,
242        git_repo_root.as_deref(),
243    )
244}
245
246// --- Helper Function Implementations ---
247// (Private functions below - no public API doc comments needed,
248// but internal // comments can clarify complex logic if necessary)
249
250/// Checks if the path is inside a Git repository and returns the repo root if true.
251fn detect_git_repo(path: &Path) -> GrabResult<Option<PathBuf>> {
252    let command_str = "git rev-parse --show-toplevel";
253    debug!(
254        "Detecting git repo by running '{}' in path: {:?}",
255        command_str, path
256    );
257
258    // Attempt to run git command, handle specific "not found" error gracefully
259    let output = match run_command("git", &["rev-parse", "--show-toplevel"], path) {
260        Ok(output) => output,
261        Err(GrabError::GitExecutionError { ref source, .. })
262            if source.kind() == io::ErrorKind::NotFound =>
263        {
264            // Git command not found, definitely not a Git repo context for us
265            info!("'git' command not found. Assuming Non-Git mode.");
266            return Ok(None);
267        }
268        Err(e) => return Err(e), // Propagate other execution errors
269    };
270
271    if output.status.success() {
272        let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
273        if !stdout.is_empty() {
274            // Attempt to canonicalize the reported root path for consistency
275            let root_path =
276                PathBuf::from(stdout)
277                    .canonicalize()
278                    .map_err(|e| GrabError::IoError {
279                        path: PathBuf::from("detected git root"),
280                        source: e,
281                    })?;
282            debug!("Detected Git repo root: {:?}", root_path);
283            Ok(Some(root_path))
284        } else {
285            // Command succeeded but gave empty output? Unexpected. Treat as non-repo.
286            warn!(
287                "'{}' succeeded but returned empty output in {:?}. Treating as Non-Git mode.",
288                command_str, path
289            );
290            Ok(None)
291        }
292    } else {
293        let stderr = String::from_utf8_lossy(&output.stderr);
294        // Check stderr for common messages indicating not a git repo
295        if stderr.contains("not a git repository")
296            || stderr.contains("fatal: detected dubious ownership in repository at")
297        {
298            debug!(
299                "Path is not inside a Git repository (based on stderr): {:?}",
300                path
301            );
302            Ok(None)
303        } else {
304            // A different git error occurred
305            let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
306            error!(
307                "Git command '{}' failed unexpectedly.\nStderr: {}\\nStdout: {}",
308                command_str, stderr, stdout
309            );
310            Err(GrabError::GitCommandError {
311                command: command_str.to_string(),
312                stderr: stderr.into_owned(),
313                stdout,
314            })
315        }
316    }
317}
318
319/// Lists files using `git ls-files`. Handles tracked and optionally untracked files.
320fn list_files_git(repo_root: &Path, config: &GrabConfig) -> GrabResult<Vec<PathBuf>> {
321    debug!("Listing files using Git in root: {:?}", repo_root);
322
323    let base_args = ["ls-files", "-z"]; // Always use null termination for safe parsing
324    let exclude_pathspecs: Vec<String> = config
325        .exclude_patterns
326        .iter()
327        .map(|p| format!(":!{}", p)) // Format as git pathspec exclusions
328        .collect();
329    let exclude_refs: Vec<&str> = exclude_pathspecs.iter().map(AsRef::as_ref).collect();
330
331    let mut combined_files = HashSet::new(); // Use HashSet for automatic deduplication
332
333    // 1. Get TRACKED files (respecting command-line excludes)
334    let mut tracked_args = base_args.to_vec();
335    tracked_args.extend_from_slice(&exclude_refs);
336    let tracked_command_str = format!("git {}", tracked_args.join(" "));
337    debug!(
338        "Running git command for tracked files: {}",
339        tracked_command_str
340    );
341    let tracked_output = run_command("git", &tracked_args, repo_root)?;
342    if !tracked_output.status.success() {
343        let stderr = String::from_utf8_lossy(&tracked_output.stderr).into_owned();
344        let stdout = String::from_utf8_lossy(&tracked_output.stdout).into_owned();
345        error!(
346            "git ls-files command (tracked) failed.\nStderr: {}\nStdout: {}",
347            stderr, stdout
348        );
349        return Err(GrabError::GitCommandError {
350            command: tracked_command_str,
351            stderr,
352            stdout,
353        });
354    }
355    // Add tracked files found to the set
356    String::from_utf8_lossy(&tracked_output.stdout)
357        .split('\0')
358        .filter(|s| !s.is_empty())
359        .for_each(|s| {
360            combined_files.insert(repo_root.join(s));
361        });
362
363    // 2. Get UNTRACKED files (if requested, respecting .gitignore and command-line excludes)
364    if config.include_untracked {
365        let mut untracked_args = base_args.to_vec();
366        untracked_args.push("--others"); // Show untracked files
367        untracked_args.push("--exclude-standard"); // IMPORTANT: Still respect .gitignore rules
368        untracked_args.extend_from_slice(&exclude_refs); // Apply command-line excludes too
369        let untracked_command_str = format!("git {}", untracked_args.join(" "));
370        debug!(
371            "Running git command for untracked files: {}",
372            untracked_command_str
373        );
374        let untracked_output = run_command("git", &untracked_args, repo_root)?;
375
376        if !untracked_output.status.success() {
377            let stderr = String::from_utf8_lossy(&untracked_output.stderr).into_owned();
378            let stdout = String::from_utf8_lossy(&untracked_output.stdout).into_owned();
379            error!(
380                "git ls-files command (untracked) failed.\nStderr: {}\nStdout: {}",
381                stderr, stdout
382            );
383            return Err(GrabError::GitCommandError {
384                command: untracked_command_str,
385                stderr,
386                stdout,
387            });
388        }
389        // Add untracked files found to the set (duplicates are handled by HashSet)
390        String::from_utf8_lossy(&untracked_output.stdout)
391            .split('\0')
392            .filter(|s| !s.is_empty())
393            .for_each(|s| {
394                combined_files.insert(repo_root.join(s));
395            });
396    }
397
398    // Convert the combined set back to a Vec for the return type
399    let files_vec = combined_files.into_iter().collect();
400    Ok(files_vec)
401}
402
403/// Lists files using `walkdir` when not in a Git repository. Applies command-line excludes.
404fn list_files_walkdir(target_path: &Path, config: &GrabConfig) -> GrabResult<Vec<PathBuf>> {
405    debug!("Listing files using walkdir starting at: {:?}", target_path);
406    let mut files = Vec::new();
407
408    // Build the matcher for command-line exclusion patterns
409    let mut exclude_builder = GitignoreBuilder::new(target_path);
410    for pattern in &config.exclude_patterns {
411        if let Err(e) = exclude_builder.add_line(None, pattern) {
412            // Log error for invalid pattern but continue, ignoring the bad pattern
413            error!(
414                "Failed to add exclude pattern '{}': {}. This pattern will be ignored.",
415                pattern, e
416            );
417        }
418    }
419    let exclude_matcher = exclude_builder
420        .build()
421        .map_err(GrabError::GlobMatcherBuildError)?;
422
423    // Walk the directory
424    for entry_result in WalkDir::new(target_path) {
425        let entry = match entry_result {
426            Ok(entry) => entry,
427            Err(e) => {
428                // Log errors during walk (e.g., permission denied) and skip the entry
429                let path_display = e.path().map_or_else(
430                    || target_path.display().to_string(),
431                    |p| p.display().to_string(),
432                );
433                warn!(
434                    "Skipping path due to error during walk near {}: {}",
435                    path_display, e
436                );
437                continue;
438            }
439        };
440
441        let path = entry.path();
442
443        // Only process files
444        if !entry.file_type().is_file() {
445            continue;
446        }
447
448        // Apply exclusion rules using the command-line patterns
449        match exclude_matcher.matched_path_or_any_parents(path, false) {
450            Match::None | Match::Whitelist(_) => {
451                // Not ignored by --exclude patterns, add it
452                files.push(path.to_path_buf());
453            }
454            Match::Ignore(_) => {
455                // Ignored by an --exclude pattern
456                debug!("Excluding file due to pattern (walkdir): {:?} matching pattern for path or parent", path);
457                continue; // Skip this file
458            }
459        }
460    } // End walkdir loop
461
462    Ok(files)
463}
464
465/// Reads a list of files, concatenates their UTF-8 content, optionally adding headers.
466/// Skips non-UTF8 files and files with read errors, logging warnings.
467fn process_files(
468    files: &[PathBuf],
469    add_headers: bool,
470    repo_root: Option<&Path>,
471) -> GrabResult<String> {
472    debug!("Processing {} files.", files.len());
473    let mut combined_content = String::with_capacity(files.len() * 1024); // Preallocate estimate
474    let mut buffer = Vec::new(); // Reusable buffer for reading
475
476    for file_path in files {
477        debug!("Processing file: {:?}", file_path);
478
479        // --- Add Header if requested ---
480        if add_headers {
481            // Try to make the path relative to the repo root (if in git mode) or the original target path
482            let display_path = repo_root
483                .and_then(|root| file_path.strip_prefix(root).ok()) // Attempt strip_prefix if root exists
484                .unwrap_or(file_path); // Fallback to the full path if not relative or strip fails
485
486            combined_content.push_str(&format!("--- FILE: {} ---\n", display_path.display()));
487        }
488
489        // --- Read File Content ---
490        buffer.clear(); // Reuse the buffer
491        match fs::File::open(file_path) {
492            Ok(file) => {
493                let mut reader = BufReader::new(file);
494                match reader.read_to_end(&mut buffer) {
495                    Ok(_) => {
496                        // Attempt to decode as UTF-8
497                        match String::from_utf8(buffer.clone()) {
498                            // Clone needed as buffer is reused
499                            Ok(content) => {
500                                combined_content.push_str(&content);
501                                // Ensure separation with a newline, even if file doesn't end with one
502                                if !content.ends_with('\n') {
503                                    combined_content.push('\n');
504                                }
505                                // Add an extra newline between files for readability
506                                combined_content.push('\n');
507                            }
508                            Err(_) => {
509                                // File is not valid UTF-8 (likely binary)
510                                warn!("Skipping non-UTF8 file: {:?}", file_path);
511                            }
512                        }
513                    }
514                    Err(e) => {
515                        // Error reading file content
516                        warn!("Skipping file due to read error: {:?} - {}", file_path, e);
517                    }
518                }
519            }
520            Err(e) => {
521                // Error opening file (e.g., permissions changed since listing)
522                warn!("Skipping file due to open error: {:?} - {}", file_path, e);
523            }
524        }
525    }
526
527    Ok(combined_content)
528}
529
530/// Utility function to run an external command and capture its output.
531fn run_command(cmd: &str, args: &[&str], current_dir: &Path) -> GrabResult<Output> {
532    debug!(
533        "Running command: {} {:?} in directory: {:?}",
534        cmd, args, current_dir
535    );
536    let output = Command::new(cmd)
537        .args(args)
538        .current_dir(current_dir) // Execute in the specified directory
539        .output()
540        // Map I/O errors during execution (like command not found)
541        .map_err(|e| {
542            let command_string = format!("{} {}", cmd, args.join(" "));
543            if e.kind() == std::io::ErrorKind::NotFound {
544                // Provide a specific error message if the command wasn't found
545                error!(
546                    "Command '{}' not found. Is '{}' installed and in your system's PATH?",
547                    command_string, cmd
548                );
549            }
550            // Wrap the error in our custom type
551            GrabError::GitExecutionError {
552                command: command_string,
553                source: e,
554            }
555        })?;
556
557    // Return the captured output (caller checks status code)
558    Ok(output)
559}
560
561// --- Tests ---
562#[cfg(test)]
563mod tests {
564    use super::*;
565    use anyhow::Result;
566    use std::collections::HashSet; // For order-independent comparison
567    use std::fs::{self};
568    use std::path::Path; // Ensure Path is imported directly for test cases
569    use std::process::Command;
570    use tempfile::{tempdir, TempDir};
571
572    // Helper function to create a basic temporary directory setup
573    fn setup_test_dir() -> Result<(TempDir, PathBuf)> {
574        let dir = tempdir()?;
575        let path = dir.path().to_path_buf();
576
577        fs::write(path.join("file1.txt"), "Content of file 1.")?;
578        fs::write(path.join("file2.rs"), "fn main() {}")?;
579        fs::create_dir(path.join("subdir"))?;
580        fs::write(path.join("subdir").join("file3.log"), "Log message.")?;
581        fs::write(
582            path.join("subdir").join("another.txt"),
583            "Another text file.",
584        )?;
585        fs::write(path.join("binary.dat"), [0x80, 0x81, 0x82])?;
586
587        Ok((dir, path))
588    }
589
590    // Helper function to initialize a git repo in a temp dir
591    // Returns Ok(true) if git repo was set up, Ok(false) if git command failed (e.g., not found)
592    fn setup_git_repo(path: &Path) -> Result<bool> {
593        if Command::new("git").arg("--version").output().is_err() {
594            eprintln!("WARN: 'git' command not found, skipping Git-related test setup.");
595            return Ok(false); // Indicate git is not available
596        }
597
598        run_command_test("git", &["init", "-b", "main"], path)?;
599        run_command_test("git", &["config", "user.email", "test@example.com"], path)?;
600        run_command_test("git", &["config", "user.name", "Test User"], path)?;
601
602        // Add .gitignore *before* adding files
603        // Ignore logs, binary.dat, and specifically file1.txt
604        fs::write(path.join(".gitignore"), "*.log\nbinary.dat\nfile1.txt")?;
605
606        run_command_test(
607            "git",
608            &["add", ".gitignore", "file2.rs", "subdir/another.txt"],
609            path,
610        )?; // Add specific files + .gitignore
611            // Note: file1.txt, binary.dat, subdir/file3.log are NOT added initially
612
613        run_command_test("git", &["commit", "-m", "Initial commit"], path)?;
614
615        // Create an untracked file (that isn't ignored)
616        fs::write(path.join("untracked.txt"), "This file is not tracked.")?;
617        // Create an explicitly ignored file
618        fs::write(path.join("ignored.log"), "This should be ignored by git.")?; // Matches *.log
619
620        Ok(true) // Indicate git setup success
621    }
622
623    // Helper to run commands specifically for tests, panicking on failure
624    fn run_command_test(cmd: &str, args: &[&str], current_dir: &Path) -> Result<Output> {
625        println!(
626            "Running test command: {} {:?} in {:?}",
627            cmd, args, current_dir
628        );
629        let output = Command::new(cmd)
630            .args(args)
631            .current_dir(current_dir)
632            .output()?;
633
634        if !output.status.success() {
635            let stderr = String::from_utf8_lossy(&output.stderr);
636            let stdout = String::from_utf8_lossy(&output.stdout);
637            anyhow::bail!(
638                "Command failed: {} {:?}\nStatus: {}\nStdout: {}\nStderr: {}",
639                cmd,
640                args,
641                output.status,
642                stdout,
643                stderr
644            );
645        }
646        Ok(output)
647    }
648
649    // Helper to convert lists of relative paths to absolute paths in the test repo
650    // and then into a HashSet for comparison.
651    fn get_expected_set(base_path: &Path, relative_paths: &[&str]) -> HashSet<PathBuf> {
652        relative_paths.iter().map(|p| base_path.join(p)).collect()
653    }
654
655    fn assert_paths_eq(actual: Vec<PathBuf>, expected: HashSet<PathBuf>) {
656        let actual_set: HashSet<PathBuf> = actual.into_iter().collect();
657        assert_eq!(actual_set, expected);
658    }
659
660    #[test]
661    fn test_detect_git_repo_inside() -> Result<()> {
662        let (_dir, path) = setup_test_dir()?;
663        if !setup_git_repo(&path)? {
664            return Ok(());
665        } // Skip if git not available
666
667        let maybe_root = detect_git_repo(&path)?;
668        assert!(maybe_root.is_some(), "Should detect git repo");
669        assert_eq!(maybe_root.unwrap().canonicalize()?, path.canonicalize()?);
670
671        let subdir_path = path.join("subdir");
672        let maybe_root_from_subdir = detect_git_repo(&subdir_path)?;
673        assert!(
674            maybe_root_from_subdir.is_some(),
675            "Should detect git repo from subdir"
676        );
677        assert_eq!(
678            maybe_root_from_subdir.unwrap().canonicalize()?,
679            path.canonicalize()?
680        );
681
682        Ok(())
683    }
684
685    #[test]
686    fn test_detect_git_repo_outside() -> Result<()> {
687        let (_dir, path) = setup_test_dir()?;
688
689        let maybe_root = detect_git_repo(&path)?;
690        assert!(maybe_root.is_none(), "Should not detect git repo");
691        Ok(())
692    }
693
694    #[test]
695    fn test_list_files_walkdir_no_exclude() -> Result<()> {
696        let (_dir, path) = setup_test_dir()?;
697        let config = GrabConfig {
698            target_path: path.clone(),
699            add_headers: false,
700            exclude_patterns: vec![],
701            include_untracked: false,
702        };
703
704        let files = list_files_walkdir(&path, &config)?;
705
706        let expected_set = get_expected_set(
707            &path,
708            &[
709                "file1.txt",
710                "file2.rs",
711                "subdir/file3.log",
712                "subdir/another.txt",
713                "binary.dat",
714            ],
715        );
716        assert_paths_eq(files, expected_set);
717        Ok(())
718    }
719
720    #[test]
721    fn test_list_files_walkdir_with_exclude() -> Result<()> {
722        let (_dir, path) = setup_test_dir()?;
723        let config = GrabConfig {
724            target_path: path.clone(),
725            add_headers: false,
726            exclude_patterns: vec!["*.log".to_string(), "subdir/".to_string()],
727            include_untracked: false,
728        };
729
730        let files = list_files_walkdir(&path, &config)?;
731
732        let expected_set = get_expected_set(&path, &["file1.txt", "file2.rs", "binary.dat"]);
733        assert_paths_eq(files, expected_set);
734        Ok(())
735    }
736
737    // --- NEW Git Tests ---
738
739    #[test]
740    fn test_list_files_git_tracked_only() -> Result<()> {
741        let (_dir, path) = setup_test_dir()?;
742        if !setup_git_repo(&path)? {
743            return Ok(());
744        } // Skip if git not available
745
746        let config = GrabConfig {
747            target_path: path.clone(), // Not directly used by list_files_git, but needed
748            add_headers: false,
749            exclude_patterns: vec![],
750            include_untracked: false, // Default: only tracked files
751        };
752
753        let files = list_files_git(&path, &config)?;
754
755        // Expected: Only files explicitly added and committed (.gitignore, file2.rs, subdir/another.txt)
756        let expected_set =
757            get_expected_set(&path, &[".gitignore", "file2.rs", "subdir/another.txt"]);
758
759        println!("Git tracked files found: {:?}", files);
760        assert_paths_eq(files, expected_set);
761        Ok(())
762    }
763
764    #[test]
765    fn test_list_files_git_include_untracked() -> Result<()> {
766        let (_dir, path) = setup_test_dir()?;
767        if !setup_git_repo(&path)? {
768            return Ok(());
769        } // Skip if git not available
770
771        let config = GrabConfig {
772            target_path: path.clone(),
773            add_headers: false,
774            exclude_patterns: vec![],
775            include_untracked: true, // The key flag for this test
776        };
777
778        let files = list_files_git(&path, &config)?;
779
780        // Expected: tracked files + untracked.txt
781        // .gitignore'd files (file1.txt, binary.dat, *.log) should NOT be included
782        let expected_set = get_expected_set(
783            &path,
784            &[
785                ".gitignore",
786                "file2.rs",
787                "subdir/another.txt",
788                "untracked.txt", // The untracked file
789            ],
790        );
791
792        println!("Git tracked+untracked files found: {:?}", files);
793        assert_paths_eq(files, expected_set);
794        Ok(())
795    }
796
797    #[test]
798    fn test_list_files_git_with_exclude() -> Result<()> {
799        let (_dir, path) = setup_test_dir()?;
800        if !setup_git_repo(&path)? {
801            return Ok(());
802        } // Skip if git not available
803
804        let config = GrabConfig {
805            target_path: path.clone(),
806            add_headers: false,
807            // Exclude Rust files and everything in subdir/
808            exclude_patterns: vec!["*.rs".to_string(), "subdir/".to_string()],
809            include_untracked: false, // Tracked only
810        };
811
812        let files = list_files_git(&path, &config)?;
813
814        // Expected: .gitignore (file2.rs and subdir/another.txt are excluded)
815        let expected_set = get_expected_set(&path, &[".gitignore"]);
816
817        println!("Git tracked files (with exclude) found: {:?}", files);
818        assert_paths_eq(files, expected_set);
819        Ok(())
820    }
821
822    #[test]
823    fn test_list_files_git_untracked_with_exclude() -> Result<()> {
824        let (_dir, path) = setup_test_dir()?;
825        if !setup_git_repo(&path)? {
826            return Ok(());
827        } // Skip if git not available
828
829        let config = GrabConfig {
830            target_path: path.clone(),
831            add_headers: false,
832            // Exclude .txt files
833            exclude_patterns: vec!["*.txt".to_string()],
834            include_untracked: true, // Include untracked
835        };
836
837        let files = list_files_git(&path, &config)?;
838
839        // Expected: .gitignore, file2.rs
840        // Excluded: subdir/another.txt, untracked.txt
841        let expected_set = get_expected_set(&path, &[".gitignore", "file2.rs"]);
842
843        println!(
844            "Git tracked+untracked (with exclude) files found: {:?}",
845            files
846        );
847        assert_paths_eq(files, expected_set);
848        Ok(())
849    }
850
851    // --- End of NEW Git Tests ---
852
853    #[test]
854    fn test_process_files_no_headers_skip_binary() -> Result<()> {
855        let (_dir, path) = setup_test_dir()?;
856        let files_to_process = vec![
857            path.join("file1.txt"),
858            path.join("binary.dat"),
859            path.join("file2.rs"),
860        ];
861
862        let result = process_files(&files_to_process, false, None)?;
863
864        let expected_content = "Content of file 1.\n\nfn main() {}\n\n";
865
866        assert_eq!(result.trim(), expected_content.trim());
867
868        Ok(())
869    }
870
871    #[test]
872    fn test_process_files_with_headers() -> Result<()> {
873        let (_dir, path) = setup_test_dir()?;
874        let files_to_process = vec![path.join("file1.txt"), path.join("file2.rs")];
875
876        let repo_root = Some(path.as_path());
877
878        let result = process_files(&files_to_process, true, repo_root)?;
879
880        let expected_content = format!(
881            "--- FILE: {} ---\nContent of file 1.\n\n--- FILE: {} ---\nfn main() {{}}\n\n",
882            Path::new("file1.txt").display(), // Use Path::new for consistent display across OS
883            Path::new("file2.rs").display()
884        );
885
886        assert_eq!(result.trim(), expected_content.trim());
887
888        Ok(())
889    }
890} // End of mod tests