1use std::collections::HashSet; use std::io; use std::path::{Path, PathBuf};
6
7use ignore::gitignore::GitignoreBuilder;
8use ignore::Match;
9use log::{debug, error, info, warn};
10use walkdir::WalkDir;
11
12use crate::config::GrabConfig;
14use crate::errors::{GrabError, GrabResult};
15use crate::utils::run_command; pub(crate) fn detect_git_repo(path: &Path) -> GrabResult<Option<PathBuf>> {
20 let command_str = "git rev-parse --show-toplevel";
21 debug!(
22 "Detecting git repo by running '{}' in path: {:?}",
23 command_str, path
24 );
25
26 let output = match run_command("git", &["rev-parse", "--show-toplevel"], path) {
28 Ok(output) => output,
30 Err(GrabError::GitExecutionError { ref source, .. })
31 if source.kind() == io::ErrorKind::NotFound =>
32 {
33 info!("'git' command not found. Assuming Non-Git mode.");
34 return Ok(None);
35 }
36 Err(e) => return Err(e),
37 };
38
39 if output.status.success() {
40 let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
41 if !stdout.is_empty() {
42 let root_path_raw = PathBuf::from(&stdout);
43 let root_path = root_path_raw
44 .canonicalize()
45 .map_err(|e| GrabError::IoError {
46 path: root_path_raw.clone(),
47 source: e,
48 })?;
49 debug!("Detected Git repo root: {:?}", root_path);
50 Ok(Some(root_path))
51 } else {
52 warn!(
53 "'{}' succeeded but returned empty output in {:?}. Treating as Non-Git mode.",
54 command_str, path
55 );
56 Ok(None)
57 }
58 } else {
59 let stderr = String::from_utf8_lossy(&output.stderr);
60 if stderr.contains("fatal: detected dubious ownership in repository at") {
61 warn!(
62 "Git reports 'dubious ownership' for {:?}. Falling back to non-git mode. Consider running: git config --global --add safe.directory {:?}",
63 path, path
64 );
65 Ok(None)
66 } else if stderr.contains("not a git repository") {
67 debug!(
68 "Path is not inside a Git repository (based on stderr): {:?}",
69 path
70 );
71 Ok(None)
72 } else {
73 let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
74 error!(
75 "Git command '{}' failed unexpectedly.\nStderr: {}\\nStdout: {}",
76 command_str, stderr, stdout
77 );
78 Err(GrabError::GitCommandError {
79 command: command_str.to_string(),
80 stderr: stderr.into_owned(),
81 stdout,
82 })
83 }
84 }
85}
86
87pub(crate) fn list_files_git(
90 repo_root: &Path,
91 config: &GrabConfig,
92 scope_subdir: Option<&Path>,
93) -> GrabResult<Vec<PathBuf>> {
94 debug!(
95 "Listing files using Git in root {:?} with scope {:?}",
96 repo_root, scope_subdir
97 );
98
99 let mut combined_files = HashSet::new();
100
101 let scope_specs = build_scope_pathspecs(repo_root, scope_subdir);
102 let exclude_specs = build_exclude_pathspecs(config);
103
104 let mut tracked_args = vec!["ls-files".to_string(), "-z".to_string()];
105 tracked_args.extend(scope_specs.iter().cloned());
106 tracked_args.extend(exclude_specs.iter().cloned());
107
108 run_git_ls(repo_root, &tracked_args, "tracked", &mut combined_files)?;
109
110 if config.include_untracked {
111 let mut untracked_args = vec![
112 "ls-files".to_string(),
113 "-z".to_string(),
114 "--others".to_string(),
115 "--exclude-standard".to_string(),
116 ];
117 untracked_args.extend(scope_specs.iter().cloned());
118 untracked_args.extend(exclude_specs.iter().cloned());
119
120 run_git_ls(repo_root, &untracked_args, "untracked", &mut combined_files)?;
121 } else {
122 debug!("Skipping untracked files per configuration.");
123 }
124
125 let mut files: Vec<PathBuf> = combined_files.into_iter().collect();
126 files.sort();
127 Ok(files)
128}
129
130pub(crate) fn list_files_walkdir(
133 target_path: &Path,
134 config: &GrabConfig,
135) -> GrabResult<Vec<PathBuf>> {
136 debug!("Listing files using walkdir starting at: {:?}", target_path);
137 let mut files = Vec::new();
138
139 let mut exclude_builder = GitignoreBuilder::new(target_path);
140
141 if !config.include_default_output {
143 let pattern = normalize_glob("dirgrab.txt");
144 if let Err(e) = exclude_builder.add_line(None, &pattern) {
145 warn!("Failed to add default exclusion pattern 'dirgrab.txt': {}. This exclusion might not apply.", e);
146 } else {
147 debug!("Applying default exclusion for 'dirgrab.txt'");
148 }
149 } else {
150 info!("Default exclusion for 'dirgrab.txt' is disabled by --include-default-output flag.");
151 }
152 let git_dir_pattern = normalize_glob(".git/");
154 if let Err(e) = exclude_builder.add_line(None, &git_dir_pattern) {
155 warn!(
156 "Failed to add default exclusion pattern '.git/': {}. Git directory might be included.",
157 e
158 );
159 } else {
160 debug!("Applying default exclusion for '.git/'");
161 }
162
163 for pattern in &config.exclude_patterns {
165 let normalized = normalize_glob(pattern);
166 if let Err(e) = exclude_builder.add_line(None, &normalized) {
167 error!(
168 "Failed to add exclude pattern '{}': {}. This pattern will be ignored.",
169 pattern, e
170 );
171 }
172 }
173 let exclude_matcher = exclude_builder
174 .build()
175 .map_err(GrabError::GlobMatcherBuildError)?;
176
177 let canonical_root = target_path
179 .canonicalize()
180 .unwrap_or_else(|_| target_path.to_path_buf());
181
182 let mut walker = WalkDir::new(target_path).follow_links(true).into_iter();
186 while let Some(entry_result) = walker.next() {
187 let entry = match entry_result {
188 Ok(entry) => entry,
189 Err(e) => {
190 let path_display = e.path().map_or_else(
191 || target_path.display().to_string(),
192 |p| p.display().to_string(),
193 );
194 warn!(
195 "Skipping path due to error during walk near {}: {}",
196 path_display, e
197 );
198 continue;
199 }
200 };
201
202 let path = entry.path();
203
204 if entry.path_is_symlink() {
206 if let Ok(canonical) = path.canonicalize() {
207 if !canonical.starts_with(&canonical_root) {
208 debug!(
209 "Skipping symlink that escapes target directory: {:?} -> {:?}",
210 path, canonical
211 );
212 if entry.file_type().is_dir() {
213 walker.skip_current_dir();
214 }
215 continue;
216 }
217 }
218 }
219
220 if entry.file_type().is_dir() {
221 if matches!(
222 exclude_matcher.matched_path_or_any_parents(path, true),
223 Match::Ignore(_)
224 ) {
225 debug!(
226 "Pruning directory due to pattern match on path or parent (walkdir): {:?}",
227 path
228 );
229 walker.skip_current_dir();
230 }
231 continue;
232 }
233
234 if !entry.file_type().is_file() {
235 continue;
236 }
237
238 match exclude_matcher.matched_path_or_any_parents(path, false) {
239 Match::None | Match::Whitelist(_) => {
240 files.push(path.to_path_buf());
241 }
242 Match::Ignore(_) => {
243 debug!(
244 "Excluding file due to pattern match on path or parent (walkdir): {:?}",
245 path
246 );
247 }
248 }
249 }
250
251 files.sort();
252 Ok(files)
253}
254
255fn run_git_ls(
256 repo_root: &Path,
257 args: &[String],
258 phase: &str,
259 combined_files: &mut HashSet<PathBuf>,
260) -> GrabResult<()> {
261 let display_command = format!("git {}", args.join(" "));
262 debug!(
263 "Running git command for {} files: {}",
264 phase, display_command
265 );
266
267 let arg_refs: Vec<&str> = args.iter().map(|s| s.as_str()).collect();
268 let output = run_command("git", &arg_refs, repo_root)?;
269 if !output.status.success() {
270 let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
271 let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
272 error!(
273 "git ls-files command ({}) failed.\nStderr: {}\nStdout: {}",
274 phase, stderr, stdout
275 );
276 return Err(GrabError::GitCommandError {
277 command: display_command,
278 stderr,
279 stdout,
280 });
281 }
282
283 for path in String::from_utf8_lossy(&output.stdout)
284 .split('\0')
285 .filter(|s| !s.is_empty())
286 {
287 combined_files.insert(repo_root.join(path));
288 }
289
290 Ok(())
291}
292
293fn build_scope_pathspecs(repo_root: &Path, scope_subdir: Option<&Path>) -> Vec<String> {
294 let mut specs = Vec::new();
295 if let Some(rel_path) = scope_subdir {
296 if rel_path.as_os_str().is_empty() {
297 return specs;
298 }
299
300 let absolute_path = repo_root.join(rel_path);
301 let normalized = normalize_for_git(rel_path);
302 if absolute_path.is_dir() {
303 let suffix = if normalized.ends_with('/') {
304 "**"
305 } else {
306 "/**"
307 };
308 let spec = format!(":(glob){}{}", normalized, suffix);
309 specs.push(spec);
310 } else {
311 specs.push(format!(":(glob){}", normalized));
312 }
313 }
314 specs
315}
316
317fn build_exclude_pathspecs(config: &GrabConfig) -> Vec<String> {
318 let mut specs = Vec::new();
319 let mut seen = HashSet::new();
320
321 if !config.include_default_output {
322 let normalized = normalize_glob("dirgrab.txt");
323 if seen.insert(normalized.clone()) {
324 debug!("Applying default exclusion for 'dirgrab.txt'");
325 specs.push(format!(":(glob,exclude){}", prefix_for_git(&normalized)));
326 }
327 } else {
328 info!("Default exclusion for 'dirgrab.txt' is disabled by configuration.");
329 }
330
331 for pattern in &config.exclude_patterns {
332 let normalized = normalize_glob(pattern);
333 if seen.insert(normalized.clone()) {
334 specs.push(format!(":(glob,exclude){}", prefix_for_git(&normalized)));
335 } else {
336 debug!(
337 "Skipping duplicate exclude pattern '{}' when building git pathspecs",
338 pattern
339 );
340 }
341 }
342
343 specs
344}
345
346fn normalize_for_git(path: &Path) -> String {
347 path.components()
348 .map(|comp| comp.as_os_str().to_string_lossy())
349 .collect::<Vec<_>>()
350 .join("/")
351}
352
353fn prefix_for_git(pattern: &str) -> String {
354 if pattern.contains('/') {
355 pattern.to_string()
356 } else {
357 format!("**/{}", pattern)
358 }
359}
360
361pub fn normalize_glob(pattern: &str) -> String {
364 pattern.replace('\\', "/")
365}