Skip to main content

dirgrab_lib/
listing.rs

1// --- FILE: dirgrab-lib/src/listing.rs ---
2
3use std::collections::HashSet; // Needed for list_files_git
4use std::io; // Needed for io::ErrorKind::NotFound check indirectly via run_command/detect_git_repo
5use std::path::{Path, PathBuf};
6
7use ignore::gitignore::GitignoreBuilder;
8use ignore::Match;
9use log::{debug, error, info, warn};
10use walkdir::WalkDir;
11
12// Use crate:: paths for sibling modules
13use crate::config::GrabConfig;
14use crate::errors::{GrabError, GrabResult};
15use crate::utils::run_command; // Use the utility function
16
17/// Checks if the path is inside a Git repository and returns the repo root if true.
18/// Crate-public as it's only called by grab_contents in lib.rs.
19pub(crate) fn detect_git_repo(path: &Path) -> GrabResult<Option<PathBuf>> {
20    let command_str = "git rev-parse --show-toplevel";
21    debug!(
22        "Detecting git repo by running '{}' in path: {:?}",
23        command_str, path
24    );
25
26    // Attempt to run git command, handle specific "not found" error gracefully
27    let output = match run_command("git", &["rev-parse", "--show-toplevel"], path) {
28        // Uses run_command
29        Ok(output) => output,
30        Err(GrabError::GitExecutionError { ref source, .. })
31            if source.kind() == io::ErrorKind::NotFound =>
32        {
33            info!("'git' command not found. Assuming Non-Git mode.");
34            return Ok(None);
35        }
36        Err(e) => return Err(e),
37    };
38
39    if output.status.success() {
40        let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
41        if !stdout.is_empty() {
42            let root_path_raw = PathBuf::from(&stdout);
43            let root_path = root_path_raw
44                .canonicalize()
45                .map_err(|e| GrabError::IoError {
46                    path: root_path_raw.clone(),
47                    source: e,
48                })?;
49            debug!("Detected Git repo root: {:?}", root_path);
50            Ok(Some(root_path))
51        } else {
52            warn!(
53                "'{}' succeeded but returned empty output in {:?}. Treating as Non-Git mode.",
54                command_str, path
55            );
56            Ok(None)
57        }
58    } else {
59        let stderr = String::from_utf8_lossy(&output.stderr);
60        if stderr.contains("fatal: detected dubious ownership in repository at") {
61            warn!(
62                "Git reports 'dubious ownership' for {:?}. Falling back to non-git mode. Consider running: git config --global --add safe.directory {:?}",
63                path, path
64            );
65            Ok(None)
66        } else if stderr.contains("not a git repository") {
67            debug!(
68                "Path is not inside a Git repository (based on stderr): {:?}",
69                path
70            );
71            Ok(None)
72        } else {
73            let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
74            error!(
75                "Git command '{}' failed unexpectedly.\nStderr: {}\\nStdout: {}",
76                command_str, stderr, stdout
77            );
78            Err(GrabError::GitCommandError {
79                command: command_str.to_string(),
80                stderr: stderr.into_owned(),
81                stdout,
82            })
83        }
84    }
85}
86
87/// Lists files using `git ls-files`. Handles tracked and optionally untracked files.
88/// Crate-public as it's only called by grab_contents in lib.rs.
89pub(crate) fn list_files_git(
90    repo_root: &Path,
91    config: &GrabConfig,
92    scope_subdir: Option<&Path>,
93) -> GrabResult<Vec<PathBuf>> {
94    debug!(
95        "Listing files using Git in root {:?} with scope {:?}",
96        repo_root, scope_subdir
97    );
98
99    let mut combined_files = HashSet::new();
100
101    let scope_specs = build_scope_pathspecs(repo_root, scope_subdir);
102    let exclude_specs = build_exclude_pathspecs(config);
103
104    let mut tracked_args = vec!["ls-files".to_string(), "-z".to_string()];
105    tracked_args.extend(scope_specs.iter().cloned());
106    tracked_args.extend(exclude_specs.iter().cloned());
107
108    run_git_ls(repo_root, &tracked_args, "tracked", &mut combined_files)?;
109
110    if config.include_untracked {
111        let mut untracked_args = vec![
112            "ls-files".to_string(),
113            "-z".to_string(),
114            "--others".to_string(),
115            "--exclude-standard".to_string(),
116        ];
117        untracked_args.extend(scope_specs.iter().cloned());
118        untracked_args.extend(exclude_specs.iter().cloned());
119
120        run_git_ls(repo_root, &untracked_args, "untracked", &mut combined_files)?;
121    } else {
122        debug!("Skipping untracked files per configuration.");
123    }
124
125    let mut files: Vec<PathBuf> = combined_files.into_iter().collect();
126    files.sort();
127    Ok(files)
128}
129
130/// Lists files using `walkdir` when not in a Git repository. Applies command-line excludes.
131/// Crate-public as it's only called by grab_contents in lib.rs.
132pub(crate) fn list_files_walkdir(
133    target_path: &Path,
134    config: &GrabConfig,
135) -> GrabResult<Vec<PathBuf>> {
136    debug!("Listing files using walkdir starting at: {:?}", target_path);
137    let mut files = Vec::new();
138
139    let mut exclude_builder = GitignoreBuilder::new(target_path);
140
141    // Add default exclusions for dirgrab.txt (conditionally) and .git/
142    if !config.include_default_output {
143        let pattern = normalize_glob("dirgrab.txt");
144        if let Err(e) = exclude_builder.add_line(None, &pattern) {
145            warn!("Failed to add default exclusion pattern 'dirgrab.txt': {}. This exclusion might not apply.", e);
146        } else {
147            debug!("Applying default exclusion for 'dirgrab.txt'");
148        }
149    } else {
150        info!("Default exclusion for 'dirgrab.txt' is disabled by --include-default-output flag.");
151    }
152    // Always exclude the .git directory when using walkdir
153    let git_dir_pattern = normalize_glob(".git/");
154    if let Err(e) = exclude_builder.add_line(None, &git_dir_pattern) {
155        warn!(
156            "Failed to add default exclusion pattern '.git/': {}. Git directory might be included.",
157            e
158        );
159    } else {
160        debug!("Applying default exclusion for '.git/'");
161    }
162
163    // Add user-provided exclusion patterns
164    for pattern in &config.exclude_patterns {
165        let normalized = normalize_glob(pattern);
166        if let Err(e) = exclude_builder.add_line(None, &normalized) {
167            error!(
168                "Failed to add exclude pattern '{}': {}. This pattern will be ignored.",
169                pattern, e
170            );
171        }
172    }
173    let exclude_matcher = exclude_builder
174        .build()
175        .map_err(GrabError::GlobMatcherBuildError)?;
176
177    // Canonicalize the target to use as a boundary check for symlinks.
178    let canonical_root = target_path
179        .canonicalize()
180        .unwrap_or_else(|_| target_path.to_path_buf());
181
182    // Walk directory while pruning ignored subtrees early.
183    // follow_links(true) matches Git mode behavior where symlinked files are included.
184    // Walkdir detects circular symlinks and emits errors, which we handle below.
185    let mut walker = WalkDir::new(target_path).follow_links(true).into_iter();
186    while let Some(entry_result) = walker.next() {
187        let entry = match entry_result {
188            Ok(entry) => entry,
189            Err(e) => {
190                let path_display = e.path().map_or_else(
191                    || target_path.display().to_string(),
192                    |p| p.display().to_string(),
193                );
194                warn!(
195                    "Skipping path due to error during walk near {}: {}",
196                    path_display, e
197                );
198                continue;
199            }
200        };
201
202        let path = entry.path();
203
204        // Boundary check: if a symlink resolves outside the target directory, skip it.
205        if entry.path_is_symlink() {
206            if let Ok(canonical) = path.canonicalize() {
207                if !canonical.starts_with(&canonical_root) {
208                    debug!(
209                        "Skipping symlink that escapes target directory: {:?} -> {:?}",
210                        path, canonical
211                    );
212                    if entry.file_type().is_dir() {
213                        walker.skip_current_dir();
214                    }
215                    continue;
216                }
217            }
218        }
219
220        if entry.file_type().is_dir() {
221            if matches!(
222                exclude_matcher.matched_path_or_any_parents(path, true),
223                Match::Ignore(_)
224            ) {
225                debug!(
226                    "Pruning directory due to pattern match on path or parent (walkdir): {:?}",
227                    path
228                );
229                walker.skip_current_dir();
230            }
231            continue;
232        }
233
234        if !entry.file_type().is_file() {
235            continue;
236        }
237
238        match exclude_matcher.matched_path_or_any_parents(path, false) {
239            Match::None | Match::Whitelist(_) => {
240                files.push(path.to_path_buf());
241            }
242            Match::Ignore(_) => {
243                debug!(
244                    "Excluding file due to pattern match on path or parent (walkdir): {:?}",
245                    path
246                );
247            }
248        }
249    }
250
251    files.sort();
252    Ok(files)
253}
254
255fn run_git_ls(
256    repo_root: &Path,
257    args: &[String],
258    phase: &str,
259    combined_files: &mut HashSet<PathBuf>,
260) -> GrabResult<()> {
261    let display_command = format!("git {}", args.join(" "));
262    debug!(
263        "Running git command for {} files: {}",
264        phase, display_command
265    );
266
267    let arg_refs: Vec<&str> = args.iter().map(|s| s.as_str()).collect();
268    let output = run_command("git", &arg_refs, repo_root)?;
269    if !output.status.success() {
270        let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
271        let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
272        error!(
273            "git ls-files command ({}) failed.\nStderr: {}\nStdout: {}",
274            phase, stderr, stdout
275        );
276        return Err(GrabError::GitCommandError {
277            command: display_command,
278            stderr,
279            stdout,
280        });
281    }
282
283    for path in String::from_utf8_lossy(&output.stdout)
284        .split('\0')
285        .filter(|s| !s.is_empty())
286    {
287        combined_files.insert(repo_root.join(path));
288    }
289
290    Ok(())
291}
292
293fn build_scope_pathspecs(repo_root: &Path, scope_subdir: Option<&Path>) -> Vec<String> {
294    let mut specs = Vec::new();
295    if let Some(rel_path) = scope_subdir {
296        if rel_path.as_os_str().is_empty() {
297            return specs;
298        }
299
300        let absolute_path = repo_root.join(rel_path);
301        let normalized = normalize_for_git(rel_path);
302        if absolute_path.is_dir() {
303            let suffix = if normalized.ends_with('/') {
304                "**"
305            } else {
306                "/**"
307            };
308            let spec = format!(":(glob){}{}", normalized, suffix);
309            specs.push(spec);
310        } else {
311            specs.push(format!(":(glob){}", normalized));
312        }
313    }
314    specs
315}
316
317fn build_exclude_pathspecs(config: &GrabConfig) -> Vec<String> {
318    let mut specs = Vec::new();
319    let mut seen = HashSet::new();
320
321    if !config.include_default_output {
322        let normalized = normalize_glob("dirgrab.txt");
323        if seen.insert(normalized.clone()) {
324            debug!("Applying default exclusion for 'dirgrab.txt'");
325            specs.push(format!(":(glob,exclude){}", prefix_for_git(&normalized)));
326        }
327    } else {
328        info!("Default exclusion for 'dirgrab.txt' is disabled by configuration.");
329    }
330
331    for pattern in &config.exclude_patterns {
332        let normalized = normalize_glob(pattern);
333        if seen.insert(normalized.clone()) {
334            specs.push(format!(":(glob,exclude){}", prefix_for_git(&normalized)));
335        } else {
336            debug!(
337                "Skipping duplicate exclude pattern '{}' when building git pathspecs",
338                pattern
339            );
340        }
341    }
342
343    specs
344}
345
346fn normalize_for_git(path: &Path) -> String {
347    path.components()
348        .map(|comp| comp.as_os_str().to_string_lossy())
349        .collect::<Vec<_>>()
350        .join("/")
351}
352
353fn prefix_for_git(pattern: &str) -> String {
354    if pattern.contains('/') {
355        pattern.to_string()
356    } else {
357        format!("**/{}", pattern)
358    }
359}
360
361/// Normalizes a glob pattern by replacing backslashes with forward slashes.
362/// Used to ensure consistent pattern matching across platforms.
363pub fn normalize_glob(pattern: &str) -> String {
364    pattern.replace('\\', "/")
365}