Skip to main content

dirgrab_lib/
lib.rs

1#![doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/README.md"))]
2
3// Declare modules
4mod config;
5mod errors;
6mod listing;
7mod processing;
8mod tree;
9mod utils;
10
11// Necessary imports for lib.rs itself
12use log::{debug, error, info, warn};
13use std::io; // For io::ErrorKind // For logging within grab_contents
14use std::ops::Range;
15use std::path::{Path, PathBuf};
16
17// Re-export public API components
18pub use config::GrabConfig;
19pub use errors::{GrabError, GrabResult};
20pub use listing::normalize_glob;
21
22#[derive(Debug, Clone)]
23pub struct GrabbedFile {
24    pub display_path: String,
25    pub full_range: Range<usize>,
26    pub header_range: Option<Range<usize>>,
27    pub body_range: Range<usize>,
28}
29
30#[derive(Debug, Clone)]
31pub struct GrabOutput {
32    pub content: String,
33    pub files: Vec<GrabbedFile>,
34}
35
36// --- Internal helpers ---
37
38/// Shared file-discovery logic: canonicalizes target, detects git repo,
39/// lists files. Returns (absolute file paths, repo root if any, canonical target).
40fn discover_files(config: &GrabConfig) -> GrabResult<(Vec<PathBuf>, Option<PathBuf>, PathBuf)> {
41    let target_path = config.target_path.canonicalize().map_err(|e| {
42        if e.kind() == io::ErrorKind::NotFound {
43            GrabError::TargetPathNotFound(config.target_path.clone())
44        } else {
45            GrabError::IoError {
46                path: config.target_path.clone(),
47                source: e,
48            }
49        }
50    })?;
51    debug!("Canonical target path: {:?}", target_path);
52
53    let (files, maybe_repo_root) = if config.no_git {
54        info!("Ignoring Git context due to --no-git flag.");
55        let files = listing::list_files_walkdir(&target_path, config)?;
56        (files, None)
57    } else {
58        let git_repo_root = listing::detect_git_repo(&target_path)?;
59        let scope_subdir = git_repo_root
60            .as_ref()
61            .and_then(|root| derive_scope_subdir(root, &target_path, config));
62
63        let files = match &git_repo_root {
64            Some(root) => {
65                info!("Operating in Git mode. Repo root: {:?}", root);
66                if let Some(scope) = scope_subdir.as_deref() {
67                    info!("Limiting Git file listing to sub-path: {:?}", scope);
68                } else if !config.all_repo {
69                    debug!(
70                        "Scope calculation yielded full repository; processing entire repo contents."
71                    );
72                }
73                listing::list_files_git(root, config, scope_subdir.as_deref())?
74            }
75            None => {
76                info!("Operating in Non-Git mode. Target path: {:?}", target_path);
77                listing::list_files_walkdir(&target_path, config)?
78            }
79        };
80        (files, git_repo_root)
81    };
82
83    info!("Found {} files.", files.len());
84    Ok((files, maybe_repo_root, target_path))
85}
86
87/// Computes a display path for a file (relative to repo root or target path).
88fn display_path(file_path: &Path, repo_root: Option<&Path>, target_path: &Path) -> String {
89    let base = repo_root.unwrap_or(target_path);
90    let rel = file_path.strip_prefix(base).unwrap_or(file_path);
91    let raw = rel.to_string_lossy();
92    if std::path::MAIN_SEPARATOR == '\\' && raw.contains('\\') {
93        raw.replace('\\', "/")
94    } else {
95        raw.into_owned()
96    }
97}
98
99// --- Main Public Functions ---
100
101/// Lists the files that would be included by `dirgrab` without reading their contents.
102/// Returns display paths (relative to repo root in Git mode, or target path otherwise).
103pub fn list_files(config: &GrabConfig) -> GrabResult<Vec<String>> {
104    info!("Listing files with config: {:?}", config);
105    let (files, maybe_repo_root, target_path) = discover_files(config)?;
106    Ok(files
107        .iter()
108        .map(|f| display_path(f, maybe_repo_root.as_deref(), &target_path))
109        .collect())
110}
111
112/// Performs the main `dirgrab` operation based on the provided configuration.
113pub fn grab_contents(config: &GrabConfig) -> GrabResult<String> {
114    grab_contents_detailed(config).map(|output| output.content)
115}
116
117/// Performs the main `dirgrab` operation and returns file-level metadata along with the content.
118pub fn grab_contents_detailed(config: &GrabConfig) -> GrabResult<GrabOutput> {
119    info!("Starting dirgrab operation with config: {:?}", config);
120
121    let (files_to_process, maybe_repo_root, target_path) = discover_files(config)?;
122
123    // Initialize output buffer
124    let mut output_buffer = String::new();
125    let mut file_segments = Vec::new();
126
127    // Generate and prepend tree if requested
128    if config.include_tree {
129        if files_to_process.is_empty() {
130            warn!("--include-tree specified, but no files were selected for processing. Tree will be empty.");
131            // Keep explicit tree header even if empty
132            output_buffer.push_str("---\nDIRECTORY STRUCTURE (No files selected)\n---\n\n");
133            return Ok(GrabOutput {
134                content: output_buffer,
135                files: Vec::new(),
136            });
137        } else {
138            // Determine base path for tree (repo root if git mode, target path otherwise)
139            let base_path_for_tree = if !config.no_git && maybe_repo_root.is_some() {
140                maybe_repo_root.as_deref().unwrap() // Safe unwrap due to is_some() check
141            } else {
142                &target_path
143            };
144            debug!(
145                "Generating directory tree relative to: {:?}",
146                base_path_for_tree
147            );
148
149            match tree::generate_indented_tree(&files_to_process, base_path_for_tree) {
150                Ok(tree_str) => {
151                    output_buffer.push_str("---\nDIRECTORY STRUCTURE\n---\n");
152                    output_buffer.push_str(&tree_str);
153                    output_buffer.push_str("\n---\nFILE CONTENTS\n---\n\n");
154                }
155                Err(e) => {
156                    error!("Failed to generate directory tree: {}", e);
157                    // Still add header indicating failure
158                    output_buffer.push_str("---\nERROR GENERATING DIRECTORY STRUCTURE\n---\n\n");
159                }
160            }
161        }
162    }
163
164    // Process files and append content (only if files exist)
165    if !files_to_process.is_empty() {
166        // Updated call to process_files to pass the whole config struct
167        let processed = processing::process_files(
168            &files_to_process,
169            config, // Pass config struct
170            maybe_repo_root.as_deref(),
171            &target_path,
172        )?;
173        let base_offset = output_buffer.len();
174        output_buffer.push_str(&processed.content);
175        for segment in processed.files {
176            file_segments.push(GrabbedFile {
177                display_path: segment.display_path,
178                full_range: offset_range(&segment.full_range, base_offset),
179                header_range: segment
180                    .header_range
181                    .map(|range| offset_range(&range, base_offset)),
182                body_range: offset_range(&segment.body_range, base_offset),
183            });
184        }
185    } else if !config.include_tree {
186        // If no files AND no tree was requested
187        warn!("No files selected for processing based on current configuration.");
188        // Return empty string only if no files were found AND tree wasn't requested/generated.
189        return Ok(GrabOutput {
190            content: String::new(),
191            files: Vec::new(),
192        });
193    }
194
195    // Return the combined buffer (might contain only tree, or tree + content, or just content)
196    Ok(GrabOutput {
197        content: output_buffer,
198        files: file_segments,
199    })
200}
201
202fn derive_scope_subdir(
203    repo_root: &Path,
204    target_path: &Path,
205    config: &GrabConfig,
206) -> Option<PathBuf> {
207    if config.all_repo {
208        return None;
209    }
210
211    match target_path.strip_prefix(repo_root) {
212        Ok(rel) => {
213            if rel.as_os_str().is_empty() {
214                None
215            } else {
216                Some(rel.to_path_buf())
217            }
218        }
219        Err(_) => None,
220    }
221}
222
223fn offset_range(range: &Range<usize>, offset: usize) -> Range<usize> {
224    (range.start + offset)..(range.end + offset)
225}
226
227// --- FILE: dirgrab-lib/src/lib.rs ---
228// (Showing only the tests module and its necessary imports)
229
230// ... (rest of lib.rs code above) ...
231
232// --- Tests ---
233#[cfg(test)]
234mod tests {
235    // Use super::* to bring everything from lib.rs into scope for tests
236    // This now includes GrabConfig, GrabError, GrabResult because they are re-exported.
237    use super::*;
238    // Also need direct imports for helpers/types used *only* in tests
239    use anyhow::{Context, Result}; // Ensure Context and Result are imported from anyhow
240    use std::collections::HashSet;
241    use std::fs::{self}; // Ensure File is imported if needed by helpers
242    use std::path::{Path, PathBuf}; // Need these for helpers defined within tests mod
243    use std::process::Command;
244    use tempfile::{tempdir, TempDir};
245
246    // --- Test Setup Helpers ---
247    fn setup_test_dir() -> Result<(TempDir, PathBuf)> {
248        let dir = tempdir()?;
249        let path = dir.path().to_path_buf();
250
251        fs::write(path.join("file1.txt"), "Content of file 1.")?;
252        fs::write(path.join("file2.rs"), "fn main() {}")?;
253        fs::create_dir_all(path.join("subdir"))?; // Use create_dir_all
254        fs::write(path.join("subdir").join("file3.log"), "Log message.")?;
255        fs::write(
256            path.join("subdir").join("another.txt"),
257            "Another text file.",
258        )?;
259        fs::write(path.join("binary.dat"), [0x80, 0x81, 0x82])?;
260        fs::write(path.join("dirgrab.txt"), "Previous dirgrab output.")?;
261        Ok((dir, path))
262    }
263
264    fn setup_git_repo(path: &Path) -> Result<bool> {
265        if Command::new("git").arg("--version").output().is_err() {
266            eprintln!("WARN: 'git' command not found, skipping Git-related test setup.");
267            return Ok(false);
268        }
269        // Use crate:: path now because utils is not in super::* scope
270        crate::utils::run_command("git", &["init", "-b", "main"], path)?;
271        crate::utils::run_command("git", &["config", "user.email", "test@example.com"], path)?;
272        crate::utils::run_command("git", &["config", "user.name", "Test User"], path)?;
273        // Configure Git to handle potential CRLF issues on Windows in tests if needed
274        crate::utils::run_command("git", &["config", "core.autocrlf", "false"], path)?;
275
276        fs::write(path.join(".gitignore"), "*.log\nbinary.dat\nfile1.txt")?;
277        crate::utils::run_command(
278            "git",
279            &["add", ".gitignore", "file2.rs", "subdir/another.txt"],
280            path,
281        )?;
282        crate::utils::run_command("git", &["commit", "-m", "Initial commit"], path)?;
283
284        fs::write(path.join("untracked.txt"), "This file is not tracked.")?;
285        fs::write(path.join("ignored.log"), "This should be ignored by git.")?;
286        fs::create_dir_all(path.join("deep/sub"))?;
287        fs::write(path.join("deep/sub/nested.txt"), "Nested content")?;
288        crate::utils::run_command("git", &["add", "deep/sub/nested.txt"], path)?;
289        crate::utils::run_command("git", &["commit", "-m", "Add nested file"], path)?;
290        Ok(true)
291    }
292
293    fn run_test_command(
294        cmd: &str,
295        args: &[&str],
296        current_dir: &Path,
297    ) -> Result<std::process::Output> {
298        let output = crate::utils::run_command(cmd, args, current_dir)?;
299        if !output.status.success() {
300            let stderr = String::from_utf8_lossy(&output.stderr);
301            let stdout = String::from_utf8_lossy(&output.stdout);
302            anyhow::bail!(
303                "Command failed: {} {:?}\nStatus: {}\nStdout: {}\nStderr: {}",
304                cmd,
305                args,
306                output.status,
307                stdout,
308                stderr
309            );
310        }
311        Ok(output)
312    }
313
314    fn get_expected_set(base_path: &Path, relative_paths: &[&str]) -> HashSet<PathBuf> {
315        relative_paths.iter().map(|p| base_path.join(p)).collect()
316    }
317
318    fn assert_paths_eq(actual: Vec<PathBuf>, expected: HashSet<PathBuf>) {
319        let actual_set: HashSet<PathBuf> = actual.into_iter().collect();
320        assert_eq!(
321            actual_set, expected,
322            "Path sets differ.\nActual paths: {:?}\nExpected paths: {:?}",
323            actual_set, expected
324        );
325    }
326
327    // --- Tests ---
328    // Tests calling listing functions need crate:: prefix
329    #[test]
330    fn test_detect_git_repo_inside() -> Result<()> {
331        let (_dir, path) = setup_test_dir()?;
332        if !setup_git_repo(&path)? {
333            println!("Skipping Git test: git not found or setup failed.");
334            return Ok(());
335        }
336        let maybe_root = crate::listing::detect_git_repo(&path)?; // Use crate:: path
337        assert!(maybe_root.is_some());
338        assert_eq!(maybe_root.unwrap().canonicalize()?, path.canonicalize()?);
339        let subdir_path = path.join("subdir");
340        let maybe_root_from_subdir = crate::listing::detect_git_repo(&subdir_path)?; // Use crate:: path
341        assert!(maybe_root_from_subdir.is_some());
342        assert_eq!(
343            maybe_root_from_subdir.unwrap().canonicalize()?,
344            path.canonicalize()?
345        );
346        Ok(())
347    }
348
349    #[test]
350    fn test_detect_git_repo_outside() -> Result<()> {
351        let (_dir, path) = setup_test_dir()?;
352        // Ensure no git repo exists here
353        let maybe_root = crate::listing::detect_git_repo(&path)?; // Use crate:: path
354        assert!(maybe_root.is_none());
355        Ok(())
356    }
357
358    #[test]
359    fn test_list_files_walkdir_no_exclude_default_excludes_dirgrab_txt() -> Result<()> {
360        let (_dir, path) = setup_test_dir()?;
361        let config = GrabConfig {
362            target_path: path.clone(),
363            add_headers: false,
364            exclude_patterns: vec![],
365            include_untracked: false,      // No effect in walkdir
366            include_default_output: false, // Exclude dirgrab.txt
367            no_git: true,                  // Force walkdir
368            include_tree: false,
369            convert_pdf: false,
370            all_repo: false,
371        };
372        let files = crate::listing::list_files_walkdir(&path, &config)?; // Use crate:: path
373        let expected_set = get_expected_set(
374            &path,
375            &[
376                "file1.txt",
377                "file2.rs",
378                "subdir/file3.log",
379                "subdir/another.txt",
380                "binary.dat",
381                // "dirgrab.txt" should be excluded by default
382            ],
383        );
384        assert_paths_eq(files, expected_set);
385        Ok(())
386    }
387
388    #[test]
389    fn test_list_files_walkdir_with_exclude() -> Result<()> {
390        let (_dir, path) = setup_test_dir()?;
391        let config = GrabConfig {
392            target_path: path.clone(),
393            add_headers: false,
394            exclude_patterns: vec!["*.log".to_string(), "subdir/".to_string()], // User excludes
395            include_untracked: false,
396            include_default_output: false,
397            no_git: true, // Force walkdir
398            include_tree: false,
399            convert_pdf: false,
400            all_repo: false,
401        };
402        let files = crate::listing::list_files_walkdir(&path, &config)?; // Use crate:: path
403        let expected_set = get_expected_set(
404            &path,
405            &[
406                "file1.txt",
407                "file2.rs",
408                "binary.dat",
409                // subdir/* excluded
410                // dirgrab.txt excluded by default
411            ],
412        );
413        assert_paths_eq(files, expected_set);
414        Ok(())
415    }
416
417    #[test]
418    fn test_list_files_git_tracked_only_default_excludes_dirgrab_txt() -> Result<()> {
419        let (_dir, path) = setup_test_dir()?;
420        if !setup_git_repo(&path)? {
421            println!("Skipping Git test: git not found or setup failed.");
422            return Ok(());
423        }
424        let config = GrabConfig {
425            target_path: path.clone(), // Target doesn't matter as much as root for list_files_git
426            add_headers: false,
427            exclude_patterns: vec![],
428            include_untracked: false,      // Tracked only
429            include_default_output: false, // Exclude dirgrab.txt
430            no_git: false,                 // Use Git
431            include_tree: false,
432            convert_pdf: false,
433            all_repo: false,
434        };
435        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path, pass repo root
436        let expected_set = get_expected_set(
437            &path,
438            &[
439                ".gitignore",
440                "file2.rs",
441                "subdir/another.txt",
442                "deep/sub/nested.txt",
443                // file1.txt ignored by .gitignore
444                // file3.log ignored by .gitignore
445                // binary.dat ignored by .gitignore
446                // dirgrab.txt not tracked and default excluded
447                // untracked.txt not tracked
448                // ignored.log not tracked
449            ],
450        );
451        assert_paths_eq(files, expected_set);
452        Ok(())
453    }
454
455    #[test]
456    fn test_list_files_git_include_untracked_default_excludes_dirgrab_txt() -> Result<()> {
457        let (_dir, path) = setup_test_dir()?;
458        if !setup_git_repo(&path)? {
459            println!("Skipping Git test: git not found or setup failed.");
460            return Ok(());
461        }
462        let config = GrabConfig {
463            target_path: path.clone(),
464            add_headers: false,
465            exclude_patterns: vec![],
466            include_untracked: true,       // Include untracked
467            include_default_output: false, // Exclude dirgrab.txt
468            no_git: false,                 // Use Git
469            include_tree: false,
470            convert_pdf: false,
471            all_repo: false,
472        };
473        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
474        let expected_set = get_expected_set(
475            &path,
476            &[
477                ".gitignore",
478                "file2.rs",
479                "subdir/another.txt",
480                "deep/sub/nested.txt",
481                "untracked.txt", // Included now
482                                 // file1.txt ignored by .gitignore
483                                 // file3.log ignored by .gitignore
484                                 // binary.dat ignored by .gitignore
485                                 // ignored.log ignored by .gitignore (via --exclude-standard)
486                                 // dirgrab.txt untracked and default excluded
487            ],
488        );
489        assert_paths_eq(files, expected_set);
490        Ok(())
491    }
492
493    #[test]
494    fn test_list_files_git_with_exclude() -> Result<()> {
495        let (_dir, path) = setup_test_dir()?;
496        if !setup_git_repo(&path)? {
497            println!("Skipping Git test: git not found or setup failed.");
498            return Ok(());
499        }
500        let config = GrabConfig {
501            target_path: path.clone(),
502            add_headers: false,
503            exclude_patterns: vec![
504                "*.rs".to_string(),    // Exclude rust files
505                "subdir/".to_string(), // Exclude subdir/
506                "deep/".to_string(),   // Exclude deep/
507            ],
508            include_untracked: false, // Tracked only
509            include_default_output: false,
510            no_git: false, // Use Git
511            include_tree: false,
512            convert_pdf: false,
513            all_repo: false,
514        };
515        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
516        let expected_set = get_expected_set(&path, &[".gitignore"]); // Only .gitignore remains
517        assert_paths_eq(files, expected_set);
518        Ok(())
519    }
520
521    #[test]
522    fn test_list_files_git_untracked_with_exclude() -> Result<()> {
523        let (_dir, path) = setup_test_dir()?;
524        if !setup_git_repo(&path)? {
525            println!("Skipping Git test: git not found or setup failed.");
526            return Ok(());
527        }
528        let config = GrabConfig {
529            target_path: path.clone(),
530            add_headers: false,
531            exclude_patterns: vec!["*.txt".to_string()], // Exclude all .txt files
532            include_untracked: true,                     // Include untracked
533            include_default_output: false,
534            no_git: false, // Use Git
535            include_tree: false,
536            convert_pdf: false,
537            all_repo: false,
538        };
539        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
540        let expected_set = get_expected_set(
541            &path,
542            &[
543                ".gitignore",
544                "file2.rs",
545                // subdir/another.txt excluded by *.txt
546                // deep/sub/nested.txt excluded by *.txt
547                // untracked.txt excluded by *.txt
548                // dirgrab.txt excluded by default
549            ],
550        );
551        assert_paths_eq(files, expected_set);
552        Ok(())
553    }
554
555    #[test]
556    fn test_list_files_walkdir_include_default_output() -> Result<()> {
557        let (_dir, path) = setup_test_dir()?;
558        let config = GrabConfig {
559            target_path: path.clone(),
560            add_headers: false,
561            exclude_patterns: vec![],
562            include_untracked: false,
563            include_default_output: true, // Include dirgrab.txt
564            no_git: true,                 // Force walkdir
565            include_tree: false,
566            convert_pdf: false,
567            all_repo: false,
568        };
569        let files = crate::listing::list_files_walkdir(&path, &config)?; // Use crate:: path
570        let expected_set = get_expected_set(
571            &path,
572            &[
573                "file1.txt",
574                "file2.rs",
575                "subdir/file3.log",
576                "subdir/another.txt",
577                "binary.dat",
578                "dirgrab.txt", // Included now
579            ],
580        );
581        assert_paths_eq(files, expected_set);
582        Ok(())
583    }
584
585    #[test]
586    fn test_list_files_git_include_default_output_tracked_only() -> Result<()> {
587        let (_dir, path) = setup_test_dir()?;
588        if !setup_git_repo(&path)? {
589            println!("Skipping Git test: git not found or setup failed.");
590            return Ok(());
591        }
592        // Make dirgrab.txt tracked
593        fs::write(path.join("dirgrab.txt"), "Tracked dirgrab output.")?;
594        run_test_command("git", &["add", "dirgrab.txt"], &path)?;
595        run_test_command("git", &["commit", "-m", "Add dirgrab.txt"], &path)?;
596
597        let config = GrabConfig {
598            target_path: path.clone(),
599            add_headers: false,
600            exclude_patterns: vec![],
601            include_untracked: false,     // Tracked only
602            include_default_output: true, // Include dirgrab.txt
603            no_git: false,                // Use Git
604            include_tree: false,
605            convert_pdf: false,
606            all_repo: false,
607        };
608        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
609        let expected_set = get_expected_set(
610            &path,
611            &[
612                ".gitignore",
613                "file2.rs",
614                "subdir/another.txt",
615                "deep/sub/nested.txt",
616                "dirgrab.txt", // Included because tracked and override flag set
617            ],
618        );
619        assert_paths_eq(files, expected_set);
620        Ok(())
621    }
622
623    #[test]
624    fn test_list_files_git_include_default_output_with_untracked() -> Result<()> {
625        let (_dir, path) = setup_test_dir()?;
626        if !setup_git_repo(&path)? {
627            println!("Skipping Git test: git not found or setup failed.");
628            return Ok(());
629        }
630        // dirgrab.txt is untracked in this setup
631        let config = GrabConfig {
632            target_path: path.clone(),
633            add_headers: false,
634            exclude_patterns: vec![],
635            include_untracked: true,      // Include untracked
636            include_default_output: true, // Include dirgrab.txt
637            no_git: false,                // Use Git
638            include_tree: false,
639            convert_pdf: false,
640            all_repo: false,
641        };
642        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
643        let expected_set = get_expected_set(
644            &path,
645            &[
646                ".gitignore",
647                "file2.rs",
648                "subdir/another.txt",
649                "deep/sub/nested.txt",
650                "untracked.txt", // Included
651                "dirgrab.txt",   // Included because untracked and override flag set
652            ],
653        );
654        assert_paths_eq(files, expected_set);
655        Ok(())
656    }
657
658    #[test]
659    fn test_list_files_git_include_default_output_but_excluded_by_user() -> Result<()> {
660        let (_dir, path) = setup_test_dir()?;
661        if !setup_git_repo(&path)? {
662            println!("Skipping Git test: git not found or setup failed.");
663            return Ok(());
664        }
665        let config = GrabConfig {
666            target_path: path.clone(),
667            add_headers: false,
668            exclude_patterns: vec!["dirgrab.txt".to_string()], // User explicitly excludes
669            include_untracked: true,
670            include_default_output: true, // Override default exclusion, but user exclusion takes precedence
671            no_git: false,                // Use Git
672            include_tree: false,
673            convert_pdf: false,
674            all_repo: false,
675        };
676        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
677        let expected_set = get_expected_set(
678            &path,
679            &[
680                ".gitignore",
681                "file2.rs",
682                "subdir/another.txt",
683                "deep/sub/nested.txt",
684                "untracked.txt",
685                // dirgrab.txt excluded by user pattern
686            ],
687        );
688        assert_paths_eq(files, expected_set);
689        Ok(())
690    }
691
692    #[test]
693    fn test_list_files_git_scoped_to_subdir() -> Result<()> {
694        let (_dir, path) = setup_test_dir()?;
695        if !setup_git_repo(&path)? {
696            println!("Skipping Git test: git not found or setup failed.");
697            return Ok(());
698        }
699
700        fs::write(path.join("deep/untracked_inside.txt"), "scoped content")?;
701
702        let config = GrabConfig {
703            target_path: path.join("deep"),
704            add_headers: false,
705            exclude_patterns: vec![],
706            include_untracked: true,
707            include_default_output: false,
708            no_git: false,
709            include_tree: false,
710            convert_pdf: false,
711            all_repo: false,
712        };
713        let scope = Path::new("deep");
714        let files = crate::listing::list_files_git(&path, &config, Some(scope))?;
715        let expected_set =
716            get_expected_set(&path, &["deep/sub/nested.txt", "deep/untracked_inside.txt"]);
717        assert_paths_eq(files, expected_set);
718        Ok(())
719    }
720
721    #[test]
722    fn test_no_git_flag_forces_walkdir_in_git_repo() -> Result<()> {
723        let (_dir, path) = setup_test_dir()?;
724        if !setup_git_repo(&path)? {
725            println!("Skipping Git test: git not found or setup failed.");
726            return Ok(());
727        }
728        let config = GrabConfig {
729            target_path: path.clone(),
730            add_headers: false, // No headers for easier content check
731            exclude_patterns: vec![],
732            include_untracked: false,      // No effect
733            include_default_output: false, // Exclude dirgrab.txt
734            no_git: true,                  // Force walkdir
735            include_tree: false,           // No tree for easier content check
736            convert_pdf: false,
737            all_repo: false,
738        };
739        let result_string = grab_contents(&config)?;
740
741        // Check content from files that would be ignored by git but included by walkdir
742        assert!(
743            result_string.contains("Content of file 1."),
744            "file1.txt content missing"
745        ); // Ignored by .gitignore, but walkdir includes
746        assert!(
747            result_string.contains("Log message."),
748            "file3.log content missing"
749        ); // Ignored by .gitignore, but walkdir includes
750        assert!(
751            result_string.contains("fn main() {}"),
752            "file2.rs content missing"
753        ); // Tracked by git, included by walkdir
754        assert!(
755            result_string.contains("Another text file."),
756            "another.txt content missing"
757        ); // Tracked by git, included by walkdir
758        assert!(
759            !result_string.contains("Previous dirgrab output."),
760            "dirgrab.txt included unexpectedly"
761        ); // Excluded by default
762
763        // The binary file binary.dat is skipped because it's not valid UTF-8.
764        // The processing function logs a warning. We don't need to assert its absence
765        // in the final string, as it cannot be represented in a valid Rust String anyway.
766        // The fact that grab_contents completes successfully and includes the text files is sufficient.
767
768        Ok(())
769    }
770
771    #[test]
772    fn test_no_git_flag_still_respects_exclude_patterns() -> Result<()> {
773        let (_dir, path) = setup_test_dir()?;
774        if !setup_git_repo(&path)? {
775            println!("Skipping Git test: git not found or setup failed.");
776            return Ok(());
777        }
778        let config = GrabConfig {
779            target_path: path.clone(),
780            add_headers: false,
781            exclude_patterns: vec!["*.txt".to_string(), "*.rs".to_string()], // Exclude .txt and .rs
782            include_untracked: false,
783            include_default_output: false,
784            no_git: true, // Force walkdir
785            include_tree: false,
786            convert_pdf: false,
787            all_repo: false,
788        };
789        let result_string = grab_contents(&config)?;
790
791        assert!(result_string.contains("Log message."), "file3.log missing"); // Included
792        assert!(
793            !result_string.contains("Content of file 1."),
794            "file1.txt included unexpectedly"
795        ); // Excluded by *.txt
796        assert!(
797            !result_string.contains("fn main() {}"),
798            "file2.rs included unexpectedly"
799        ); // Excluded by *.rs
800        assert!(
801            !result_string.contains("Another text file."),
802            "another.txt included unexpectedly"
803        ); // Excluded by *.txt
804        assert!(
805            !result_string.contains("Nested content"),
806            "nested.txt included unexpectedly"
807        ); // Excluded by *.txt
808        assert!(
809            !result_string.contains("Previous dirgrab output."),
810            "dirgrab.txt included unexpectedly"
811        ); // Excluded by default & *.txt
812
813        Ok(())
814    }
815
816    #[test]
817    fn test_no_git_flag_with_include_default_output() -> Result<()> {
818        let (_dir, path) = setup_test_dir()?;
819        if !setup_git_repo(&path)? {
820            println!("Skipping Git test: git not found or setup failed.");
821            return Ok(());
822        }
823        let config = GrabConfig {
824            target_path: path.clone(),
825            add_headers: false,
826            exclude_patterns: vec![],
827            include_untracked: false,
828            include_default_output: true, // Include dirgrab.txt
829            no_git: true,                 // Force walkdir
830            include_tree: false,
831            convert_pdf: false,
832            all_repo: false,
833        };
834        let result_string = grab_contents(&config)?;
835        assert!(
836            result_string.contains("Previous dirgrab output."),
837            "Should include dirgrab.txt due to override"
838        );
839        Ok(())
840    }
841
842    #[test]
843    fn test_no_git_flag_headers_relative_to_target() -> Result<()> {
844        let (_dir, path) = setup_test_dir()?;
845        if !setup_git_repo(&path)? {
846            println!("Skipping Git test: git not found or setup failed.");
847            return Ok(());
848        }
849        let config = GrabConfig {
850            target_path: path.clone(), // Target is repo root
851            add_headers: true,         // Enable headers
852            exclude_patterns: vec![
853                "*.log".to_string(),
854                "*.dat".to_string(),
855                "dirgrab.txt".to_string(),
856            ], // Simplify output
857            include_untracked: false,
858            include_default_output: false,
859            no_git: true,        // Force walkdir
860            include_tree: false, // No tree
861            convert_pdf: false,
862            all_repo: false,
863        };
864        let result_string = grab_contents(&config)?;
865
866        // file1.txt is ignored by .gitignore but included here because no_git=true
867        let expected_header_f1 = format!("--- FILE: {} ---", Path::new("file1.txt").display());
868        assert!(
869            result_string.contains(&expected_header_f1),
870            "Header path should be relative to target_path. Expected '{}' in output:\n{}",
871            expected_header_f1,
872            result_string
873        );
874
875        // .gitignore itself is not usually listed by walkdir unless explicitly targeted? Let's check file2.rs
876        let expected_header_f2 = format!("--- FILE: {} ---", Path::new("file2.rs").display());
877        assert!(
878            result_string.contains(&expected_header_f2),
879            "Header path should be relative to target_path. Expected '{}' in output:\n{}",
880            expected_header_f2,
881            result_string
882        );
883
884        let expected_nested_header = format!(
885            "--- FILE: {} ---",
886            Path::new("deep/sub/nested.txt").display()
887        );
888        assert!(
889            result_string.contains(&expected_nested_header),
890            "Nested header path relative to target_path. Expected '{}' in output:\n{}",
891            expected_nested_header,
892            result_string
893        );
894        Ok(())
895    }
896
897    #[test]
898    fn test_git_mode_headers_relative_to_repo_root() -> Result<()> {
899        let (_dir, path) = setup_test_dir()?;
900        if !setup_git_repo(&path)? {
901            println!("Skipping Git test: git not found or setup failed.");
902            return Ok(());
903        }
904        let subdir_target = path.join("deep"); // Target is inside the repo
905        fs::create_dir_all(&subdir_target)?; // Ensure target exists
906
907        let config = GrabConfig {
908            target_path: subdir_target.clone(), // Target is 'deep' subdir
909            add_headers: true,                  // Enable headers
910            exclude_patterns: vec![],
911            include_untracked: false, // Tracked only
912            include_default_output: false,
913            no_git: false,       // Use Git mode
914            include_tree: false, // No tree
915            convert_pdf: false,
916            all_repo: false,
917        };
918        let result_string = grab_contents(&config)?; // Should still find files relative to repo root
919
920        // Check headers are relative to repo root (path), not target_path (subdir_target)
921        let expected_nested_header = format!(
922            "--- FILE: {} ---",
923            Path::new("deep/sub/nested.txt").display()
924        );
925        assert!(
926            result_string.contains(&expected_nested_header),
927            "Header path should be relative to repo root. Expected '{}' in output:\n{}",
928            expected_nested_header,
929            result_string
930        );
931
932        // Check other files outside the target dir are also included and relative to root
933        let unexpected_root_header = format!("--- FILE: {} ---", Path::new(".gitignore").display());
934        assert!(
935            !result_string.contains(&unexpected_root_header),
936            "Scoped results should not include repo-root files. Unexpected '{}' in output:\n{}",
937            unexpected_root_header,
938            result_string
939        );
940        let unexpected_rs_header = format!("--- FILE: {} ---", Path::new("file2.rs").display());
941        assert!(
942            !result_string.contains(&unexpected_rs_header),
943            "Scoped results should not include repo-root files. Unexpected '{}' in output:\n{}",
944            unexpected_rs_header,
945            result_string
946        );
947        Ok(())
948    }
949
950    #[test]
951    fn test_grab_contents_with_tree_no_git() -> Result<()> {
952        let (_dir, path) = setup_test_dir()?;
953        // Don't need git repo setup for no_git test, but keep files consistent
954        fs::write(path.join(".gitignore"), "*.log\nbinary.dat")?; // Create dummy .gitignore
955        fs::create_dir_all(path.join("deep/sub"))?;
956        fs::write(path.join("deep/sub/nested.txt"), "Nested content")?;
957        fs::write(path.join("untracked.txt"), "Untracked content")?; // File exists
958
959        let config = GrabConfig {
960            target_path: path.clone(),
961            add_headers: true,
962            exclude_patterns: vec![
963                "*.log".to_string(),       // Exclude logs
964                "*.dat".to_string(),       // Exclude binary
965                ".gitignore".to_string(),  // Exclude .gitignore itself
966                "dirgrab.txt".to_string(), // Exclude default output file explicitly too
967            ],
968            include_untracked: false,      // No effect
969            include_default_output: false, // Also excluded above
970            no_git: true,                  // Force walkdir
971            include_tree: true,            // THE flag to test
972            convert_pdf: false,
973            all_repo: false,
974        };
975        let result = grab_contents(&config)?;
976
977        // Expected tree for walkdir with excludes applied
978        // file1.txt, file2.rs, another.txt, nested.txt, untracked.txt should remain
979        let expected_tree_part = "\
980---
981DIRECTORY STRUCTURE
982---
983- deep/
984  - sub/
985    - nested.txt
986- file1.txt
987- file2.rs
988- subdir/
989  - another.txt
990- untracked.txt
991";
992
993        assert!(
994            result.contains(expected_tree_part),
995            "Expected tree structure not found in output:\nTree Section:\n---\n{}\n---",
996            result
997                .split("---\nFILE CONTENTS\n---")
998                .next()
999                .unwrap_or("TREE NOT FOUND")
1000        );
1001
1002        assert!(
1003            result.contains("\n---\nFILE CONTENTS\n---\n\n"),
1004            "Expected file content separator not found"
1005        );
1006        // Check presence of headers and content for included files
1007        assert!(
1008            result.contains("--- FILE: file1.txt ---"),
1009            "Header for file1.txt missing"
1010        );
1011        assert!(
1012            result.contains("Content of file 1."),
1013            "Content of file1.txt missing"
1014        );
1015        assert!(
1016            result.contains("--- FILE: deep/sub/nested.txt ---"),
1017            "Header for nested.txt missing"
1018        );
1019        assert!(
1020            result.contains("Nested content"),
1021            "Content of nested.txt missing"
1022        );
1023        // Check absence of excluded file content
1024        assert!(
1025            !result.contains("Previous dirgrab output."),
1026            "dirgrab.txt content included unexpectedly"
1027        );
1028        assert!(
1029            !result.contains("Log message"),
1030            "Log content included unexpectedly"
1031        );
1032
1033        Ok(())
1034    }
1035
1036    #[test]
1037    fn test_grab_contents_with_tree_git_mode() -> Result<()> {
1038        let (_dir, path) = setup_test_dir()?;
1039        if !setup_git_repo(&path)? {
1040            println!("Skipping Git test: git not found or setup failed.");
1041            return Ok(());
1042        }
1043        let config = GrabConfig {
1044            target_path: path.clone(),
1045            add_headers: true,
1046            exclude_patterns: vec![".gitignore".to_string()], // Exclude .gitignore
1047            include_untracked: true,                          // Include untracked
1048            include_default_output: false,                    // Exclude dirgrab.txt (default)
1049            no_git: false,                                    // Use Git
1050            include_tree: true,                               // Include tree
1051            convert_pdf: false,
1052            all_repo: false,
1053        };
1054        let result = grab_contents(&config)?;
1055
1056        // Expected tree for git ls-files -ou --exclude-standard :!.gitignore :!dirgrab.txt
1057        // Should include: file2.rs, another.txt, nested.txt, untracked.txt
1058        let expected_tree_part = "\
1059---
1060DIRECTORY STRUCTURE
1061---
1062- deep/
1063  - sub/
1064    - nested.txt
1065- file2.rs
1066- subdir/
1067  - another.txt
1068- untracked.txt
1069";
1070        assert!(
1071            result.contains(expected_tree_part),
1072            "Expected tree structure not found in output:\nTree Section:\n---\n{}\n---",
1073            result
1074                .split("---\nFILE CONTENTS\n---")
1075                .next()
1076                .unwrap_or("TREE NOT FOUND")
1077        );
1078        assert!(
1079            result.contains("\n---\nFILE CONTENTS\n---\n\n"),
1080            "Separator missing"
1081        );
1082        // Check content
1083        assert!(
1084            result.contains("--- FILE: file2.rs ---"),
1085            "file2.rs header missing"
1086        );
1087        assert!(result.contains("fn main() {}"), "file2.rs content missing");
1088        assert!(
1089            result.contains("--- FILE: untracked.txt ---"),
1090            "untracked.txt header missing"
1091        );
1092        assert!(
1093            result.contains("This file is not tracked."),
1094            "untracked.txt content missing"
1095        );
1096        assert!(
1097            !result.contains("--- FILE: .gitignore ---"),
1098            ".gitignore included unexpectedly"
1099        );
1100
1101        Ok(())
1102    }
1103
1104    #[test]
1105    fn test_grab_contents_with_tree_empty() -> Result<()> {
1106        let (_dir, path) = setup_test_dir()?;
1107        // No need for files if we exclude everything
1108        let config = GrabConfig {
1109            target_path: path.clone(),
1110            add_headers: true,
1111            exclude_patterns: vec!["*".to_string(), "*/".to_string()], // Exclude everything
1112            include_untracked: true,
1113            include_default_output: true,
1114            no_git: true,       // Use walkdir
1115            include_tree: true, // Ask for tree
1116            convert_pdf: false,
1117            all_repo: false,
1118        };
1119        let result = grab_contents(&config)?;
1120        // Expect only the empty tree message
1121        let expected = "---\nDIRECTORY STRUCTURE (No files selected)\n---\n\n";
1122        assert_eq!(result, expected);
1123        Ok(())
1124    }
1125
1126    // Tests calling internal helpers need crate:: prefix
1127    #[test]
1128    fn test_generate_indented_tree_simple() -> Result<()> {
1129        let tmp_dir = tempdir()?;
1130        let proj_dir = tmp_dir.path().join("project");
1131        fs::create_dir_all(proj_dir.join("src"))?;
1132        fs::create_dir_all(proj_dir.join("tests"))?;
1133        fs::write(proj_dir.join("src/main.rs"), "")?;
1134        fs::write(proj_dir.join("README.md"), "")?;
1135        fs::write(proj_dir.join("src/lib.rs"), "")?;
1136        fs::write(proj_dir.join("tests/basic.rs"), "")?;
1137
1138        // Simulate paths relative to a base (doesn't have to exist for this test)
1139        let base = PathBuf::from("/project"); // Logical base
1140        let files_logical = [
1141            // Use array for BTreeSet later if needed
1142            base.join("src/main.rs"),
1143            base.join("README.md"),
1144            base.join("src/lib.rs"),
1145            base.join("tests/basic.rs"),
1146        ];
1147
1148        // Map logical paths to actual paths in temp dir for is_dir() check
1149        let files_in_tmp = files_logical
1150            .iter()
1151            .map(|p| tmp_dir.path().join(p.strip_prefix("/").unwrap()))
1152            .collect::<Vec<_>>();
1153        let base_in_tmp = tmp_dir.path().join("project"); // The actual base path
1154
1155        let tree = crate::tree::generate_indented_tree(&files_in_tmp, &base_in_tmp)?; // Use crate:: path
1156        let expected = "\
1157- README.md
1158- src/
1159  - lib.rs
1160  - main.rs
1161- tests/
1162  - basic.rs
1163";
1164        assert_eq!(tree, expected);
1165        Ok(())
1166    }
1167
1168    #[test]
1169    fn test_generate_indented_tree_deeper() -> Result<()> {
1170        let tmp_dir = tempdir()?;
1171        let proj_dir = tmp_dir.path().join("project");
1172        fs::create_dir_all(proj_dir.join("a/b/c"))?;
1173        fs::create_dir_all(proj_dir.join("a/d"))?;
1174        fs::write(proj_dir.join("a/b/c/file1.txt"), "")?;
1175        fs::write(proj_dir.join("a/d/file2.txt"), "")?;
1176        fs::write(proj_dir.join("top.txt"), "")?;
1177        fs::write(proj_dir.join("a/b/file3.txt"), "")?;
1178
1179        let base = PathBuf::from("/project"); // Logical base
1180        let files_logical = [
1181            base.join("a/b/c/file1.txt"),
1182            base.join("a/d/file2.txt"),
1183            base.join("top.txt"),
1184            base.join("a/b/file3.txt"),
1185        ];
1186
1187        let files_in_tmp = files_logical
1188            .iter()
1189            .map(|p| tmp_dir.path().join(p.strip_prefix("/").unwrap()))
1190            .collect::<Vec<_>>();
1191        let base_in_tmp = tmp_dir.path().join("project"); // Actual base
1192
1193        let tree = crate::tree::generate_indented_tree(&files_in_tmp, &base_in_tmp)?; // Use crate:: path
1194        let expected = "\
1195- a/
1196  - b/
1197    - c/
1198      - file1.txt
1199    - file3.txt
1200  - d/
1201    - file2.txt
1202- top.txt
1203";
1204        assert_eq!(tree, expected);
1205        Ok(())
1206    }
1207
1208    // --- Tests for processing.rs (Updated to pass GrabConfig) ---
1209    #[test]
1210    fn test_process_files_no_headers_skip_binary() -> Result<()> {
1211        let (_dir, path) = setup_test_dir()?;
1212        let files_to_process = vec![
1213            path.join("file1.txt"),
1214            path.join("binary.dat"), // Should be skipped as non-utf8
1215            path.join("file2.rs"),
1216        ];
1217        let config = GrabConfig {
1218            // Create dummy config
1219            target_path: path.clone(),
1220            add_headers: false, // Key part of this test
1221            exclude_patterns: vec![],
1222            include_untracked: false,
1223            include_default_output: false,
1224            no_git: true, // Assume non-git mode for simplicity here
1225            include_tree: false,
1226            convert_pdf: false, // PDF conversion off
1227            all_repo: false,
1228        };
1229        let result = crate::processing::process_files(&files_to_process, &config, None, &path)?;
1230        let expected_content = "Content of file 1.\n\nfn main() {}\n\n";
1231        assert_eq!(result.content, expected_content);
1232        assert_eq!(result.files.len(), 2);
1233        assert_eq!(result.files[0].display_path, "file1.txt");
1234        assert!(result.files[0].header_range.is_none());
1235        assert_eq!(
1236            &result.content[result.files[0].body_range.clone()],
1237            "Content of file 1.\n\n"
1238        );
1239        assert_eq!(
1240            &result.content[result.files[1].body_range.clone()],
1241            "fn main() {}\n\n"
1242        );
1243        Ok(())
1244    }
1245
1246    #[test]
1247    fn test_process_files_with_headers_git_mode() -> Result<()> {
1248        let (_dir, path) = setup_test_dir()?;
1249        // Don't need full git setup if we just provide repo_root
1250        let files_to_process = vec![path.join("file1.txt"), path.join("file2.rs")];
1251        let repo_root = Some(path.as_path());
1252        let config = GrabConfig {
1253            target_path: path.clone(), // target can be same as root for this test
1254            add_headers: true,         // Key part of this test
1255            exclude_patterns: vec![],
1256            include_untracked: false,
1257            include_default_output: false,
1258            no_git: false, // Git mode ON
1259            include_tree: false,
1260            convert_pdf: false,
1261            all_repo: false,
1262        };
1263        let result =
1264            crate::processing::process_files(&files_to_process, &config, repo_root, &path)?;
1265        let expected_content = format!(
1266            "--- FILE: {} ---\nContent of file 1.\n\n--- FILE: {} ---\nfn main() {{}}\n\n",
1267            Path::new("file1.txt").display(), // Paths relative to repo_root (which is path)
1268            Path::new("file2.rs").display()
1269        );
1270        assert_eq!(result.content, expected_content);
1271        assert_eq!(result.files.len(), 2);
1272        assert!(result.files.iter().all(|seg| seg.header_range.is_some()));
1273        let first = &result.files[0];
1274        assert_eq!(first.display_path, "file1.txt");
1275        assert_eq!(
1276            &result.content[first.header_range.clone().unwrap()],
1277            "--- FILE: file1.txt ---\n"
1278        );
1279        assert_eq!(
1280            &result.content[first.body_range.clone()],
1281            "Content of file 1.\n\n"
1282        );
1283        Ok(())
1284    }
1285
1286    #[test]
1287    fn test_process_files_headers_no_git_mode() -> Result<()> {
1288        let (_dir, path) = setup_test_dir()?;
1289        let files_to_process = vec![path.join("file1.txt"), path.join("subdir/another.txt")];
1290        let config = GrabConfig {
1291            target_path: path.clone(), // Target path is the base
1292            add_headers: true,         // Key part of this test
1293            exclude_patterns: vec![],
1294            include_untracked: false,
1295            include_default_output: false,
1296            no_git: true, // Git mode OFF
1297            include_tree: false,
1298            convert_pdf: false,
1299            all_repo: false,
1300        };
1301        let result = crate::processing::process_files(&files_to_process, &config, None, &path)?;
1302        let expected_content = format!(
1303            "--- FILE: {} ---\nContent of file 1.\n\n--- FILE: {} ---\nAnother text file.\n\n",
1304            Path::new("file1.txt").display(), // Paths relative to target_path
1305            Path::new("subdir/another.txt").display()
1306        );
1307        assert_eq!(result.content, expected_content);
1308        assert_eq!(result.files.len(), 2);
1309        Ok(())
1310    }
1311
1312    #[test]
1313    fn test_grab_contents_with_pdf_conversion_enabled() -> Result<()> {
1314        let (_dir, path) = setup_test_dir()?;
1315        let base_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1316        let fixtures_dir = base_dir.join("tests/fixtures");
1317        fs::create_dir_all(&fixtures_dir)?;
1318        let fixture_pdf_src = fixtures_dir.join("sample.pdf");
1319
1320        if !fixture_pdf_src.exists() {
1321            anyhow::bail!("Fixture PDF not found at {:?}", fixture_pdf_src);
1322        }
1323
1324        let fixture_pdf_dest = path.join("sample.pdf");
1325        fs::copy(&fixture_pdf_src, &fixture_pdf_dest).with_context(|| {
1326            format!(
1327                "Failed to copy fixture PDF from {:?} to {:?}",
1328                fixture_pdf_src, fixture_pdf_dest
1329            )
1330        })?;
1331
1332        fs::write(path.join("normal.txt"), "Normal text content.")?;
1333
1334        let config = GrabConfig {
1335            target_path: path.clone(),
1336            add_headers: true,
1337            exclude_patterns: vec![
1338                "dirgrab.txt".into(),
1339                "*.log".into(),
1340                "*.dat".into(),
1341                "*.rs".into(),
1342                "subdir/".into(),
1343                ".gitignore".into(),
1344                "deep/".into(),
1345                "untracked.txt".into(),
1346            ],
1347            include_untracked: false,
1348            include_default_output: false,
1349            no_git: true,
1350            include_tree: false,
1351            convert_pdf: true,
1352            all_repo: false,
1353        };
1354
1355        let result_string = grab_contents(&config)?;
1356
1357        // Check PDF header
1358        let expected_pdf_header = "--- FILE: sample.pdf (extracted text) ---";
1359        assert!(
1360            result_string.contains(expected_pdf_header),
1361            "Missing or incorrect PDF header. Output:\n{}",
1362            result_string
1363        );
1364
1365        // *** Update expected content based on actual PDF text - try a different snippet ***
1366        // let expected_pdf_content = "Pines are the largest and most"; // Original snippet
1367        let expected_pdf_content = "Pinaceae family"; // Try this snippet instead
1368
1369        // Add a println to see exactly what is being searched for and in what
1370        println!("Searching for: '{}'", expected_pdf_content);
1371        println!("Within: '{}'", result_string);
1372
1373        assert!(
1374            result_string.contains(expected_pdf_content),
1375            "Missing extracted PDF content ('{}'). Output:\n{}",
1376            expected_pdf_content,
1377            result_string
1378        );
1379
1380        // Check normal text file header and content
1381        let expected_txt_header = "--- FILE: normal.txt ---";
1382        let expected_txt_content = "Normal text content.";
1383        assert!(
1384            result_string.contains(expected_txt_header),
1385            "Missing or incorrect TXT header. Output:\n{}",
1386            result_string
1387        );
1388        assert!(
1389            result_string.contains(expected_txt_content),
1390            "Missing TXT content. Output:\n{}",
1391            result_string
1392        );
1393
1394        // Check that file1.txt (not excluded) is present
1395        let expected_file1_header = "--- FILE: file1.txt ---";
1396        assert!(
1397            result_string.contains(expected_file1_header),
1398            "Missing file1.txt header. Output:\n{}",
1399            result_string
1400        );
1401
1402        Ok(())
1403    }
1404
1405    #[test]
1406    fn test_grab_contents_with_pdf_conversion_disabled() -> Result<()> {
1407        let (_dir, path) = setup_test_dir()?; // Use existing helper
1408        let base_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1409        let fixtures_dir = base_dir.join("tests/fixtures");
1410        fs::create_dir_all(&fixtures_dir)?; // Ensure exists
1411        let fixture_pdf_src = fixtures_dir.join("sample.pdf");
1412
1413        // Create dummy if needed
1414        if !fixture_pdf_src.exists() {
1415            let basic_pdf_content = "%PDF-1.4\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj\n2 0 obj<</Type/Pages/Count 1/Kids[3 0 R]>>endobj\n3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Contents 4 0 R/Resources<<>>>>endobj\n4 0 obj<</Length 52>>stream\nBT /F1 12 Tf 72 712 Td (This is sample PDF text content.) Tj ET\nendstream\nendobj\nxref\n0 5\n0000000000 65535 f \n0000000010 00000 n \n0000000063 00000 n \n0000000117 00000 n \n0000000198 00000 n \ntrailer<</Size 5/Root 1 0 R>>\nstartxref\n315\n%%EOF";
1416            fs::write(&fixture_pdf_src, basic_pdf_content)?;
1417            println!(
1418                "Created dummy sample.pdf for testing at {:?}",
1419                fixture_pdf_src
1420            );
1421        }
1422
1423        let fixture_pdf_dest = path.join("sample.pdf");
1424        fs::copy(&fixture_pdf_src, &fixture_pdf_dest).with_context(|| {
1425            format!(
1426                "Failed to copy fixture PDF from {:?} to {:?}",
1427                fixture_pdf_src, fixture_pdf_dest
1428            )
1429        })?;
1430        fs::write(path.join("normal.txt"), "Normal text content.")?;
1431
1432        let config = GrabConfig {
1433            target_path: path.clone(),
1434            add_headers: true,
1435            // Exclude many things to simplify output check
1436            exclude_patterns: vec![
1437                "dirgrab.txt".into(),
1438                "*.log".into(),
1439                "*.dat".into(),
1440                "*.rs".into(),
1441                "subdir/".into(),
1442                ".gitignore".into(),
1443                "deep/".into(),
1444                "untracked.txt".into(),
1445            ],
1446            include_untracked: false,
1447            include_default_output: false,
1448            no_git: true,
1449            include_tree: false,
1450            convert_pdf: false, // Disable PDF conversion
1451            all_repo: false,
1452        };
1453
1454        let result_string = grab_contents(&config)?;
1455
1456        // Check PDF is NOT processed as text
1457        let unexpected_pdf_header_part = "(extracted text)"; // Check for the specific part of the header
1458        let unexpected_pdf_content = "This is sample PDF text content.";
1459        assert!(
1460            !result_string.contains(unexpected_pdf_header_part),
1461            "PDF extracted text header part present unexpectedly. Output:\n{}",
1462            result_string
1463        );
1464        assert!(
1465            !result_string.contains(unexpected_pdf_content),
1466            "Extracted PDF content present unexpectedly. Output:\n{}",
1467            result_string
1468        );
1469
1470        // Check normal text file is still included
1471        let expected_txt_header = "--- FILE: normal.txt ---";
1472        let expected_txt_content = "Normal text content.";
1473        assert!(
1474            result_string.contains(expected_txt_header),
1475            "Missing or incorrect TXT header. Output:\n{}",
1476            result_string
1477        );
1478        assert!(
1479            result_string.contains(expected_txt_content),
1480            "Missing TXT content. Output:\n{}",
1481            result_string
1482        );
1483
1484        // Check that file1.txt (not excluded) is present
1485        let expected_file1_header = "--- FILE: file1.txt ---";
1486        assert!(
1487            result_string.contains(expected_file1_header),
1488            "Missing file1.txt header. Output:\n{}",
1489            result_string
1490        );
1491
1492        // With convert_pdf: false, the PDF should be skipped as non-UTF8 by the fallback logic.
1493        // Check that the standard PDF header does NOT appear either.
1494        let regular_pdf_header = "--- FILE: sample.pdf ---";
1495        assert!(
1496            !result_string.contains(regular_pdf_header),
1497            "Regular PDF header present when it should have been skipped as non-utf8. Output:\n{}",
1498            result_string
1499        );
1500
1501        Ok(())
1502    }
1503    #[test]
1504    fn test_list_files_returns_display_paths() -> Result<()> {
1505        let (_dir, path) = setup_test_dir()?;
1506        let config = GrabConfig {
1507            target_path: path.clone(),
1508            add_headers: false,
1509            exclude_patterns: vec![
1510                "*.log".to_string(),
1511                "*.dat".to_string(),
1512                "dirgrab.txt".to_string(),
1513            ],
1514            include_untracked: false,
1515            include_default_output: false,
1516            no_git: true,
1517            include_tree: false,
1518            convert_pdf: false,
1519            all_repo: false,
1520        };
1521        let paths = list_files(&config)?;
1522
1523        // Should return relative display paths, sorted
1524        assert!(paths.contains(&"file1.txt".to_string()));
1525        assert!(paths.contains(&"file2.rs".to_string()));
1526        assert!(paths.contains(&"subdir/another.txt".to_string()));
1527        // Excluded files should not appear
1528        assert!(!paths.iter().any(|p| p.ends_with(".log")));
1529        assert!(!paths.iter().any(|p| p.ends_with(".dat")));
1530        assert!(!paths.iter().any(|p| p.contains("dirgrab.txt")));
1531
1532        Ok(())
1533    }
1534
1535    #[cfg(unix)]
1536    #[test]
1537    fn test_walkdir_follows_symlinks() -> Result<()> {
1538        let dir = tempdir()?;
1539        let path = dir.path().to_path_buf();
1540
1541        fs::write(path.join("real_file.txt"), "real content")?;
1542        std::os::unix::fs::symlink(path.join("real_file.txt"), path.join("link.txt"))?;
1543
1544        let config = GrabConfig {
1545            target_path: path.clone(),
1546            add_headers: false,
1547            exclude_patterns: vec![],
1548            include_untracked: false,
1549            include_default_output: true,
1550            no_git: true,
1551            include_tree: false,
1552            convert_pdf: false,
1553            all_repo: false,
1554        };
1555        let files = crate::listing::list_files_walkdir(&path, &config)?;
1556        let filenames: Vec<String> = files
1557            .iter()
1558            .filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
1559            .collect();
1560
1561        assert!(
1562            filenames.contains(&"real_file.txt".to_string()),
1563            "real file should be included"
1564        );
1565        assert!(
1566            filenames.contains(&"link.txt".to_string()),
1567            "symlink should be followed and included"
1568        );
1569        Ok(())
1570    }
1571
1572    #[cfg(unix)]
1573    #[test]
1574    fn test_walkdir_rejects_symlinks_outside_target() -> Result<()> {
1575        let outer_dir = tempdir()?;
1576        let outside = outer_dir.path().join("outside");
1577        fs::create_dir_all(&outside)?;
1578        fs::write(outside.join("secret.txt"), "secret content")?;
1579
1580        let target = outer_dir.path().join("project");
1581        fs::create_dir_all(&target)?;
1582        fs::write(target.join("local.txt"), "local content")?;
1583
1584        // Create a symlink inside project/ that points to the outside directory
1585        std::os::unix::fs::symlink(&outside, target.join("escape_link"))?;
1586
1587        let config = GrabConfig {
1588            target_path: target.clone(),
1589            add_headers: false,
1590            exclude_patterns: vec![],
1591            include_untracked: false,
1592            include_default_output: true,
1593            no_git: true,
1594            include_tree: false,
1595            convert_pdf: false,
1596            all_repo: false,
1597        };
1598        let files = crate::listing::list_files_walkdir(&target, &config)?;
1599        let filenames: Vec<String> = files
1600            .iter()
1601            .filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
1602            .collect();
1603
1604        assert!(
1605            filenames.contains(&"local.txt".to_string()),
1606            "local file should be included"
1607        );
1608        assert!(
1609            !filenames.contains(&"secret.txt".to_string()),
1610            "file from outside target directory should NOT be included via symlink"
1611        );
1612        Ok(())
1613    }
1614
1615    #[test]
1616    fn test_pdf_failure_segment_consistency() -> Result<()> {
1617        let (_dir, path) = setup_test_dir()?;
1618        // Create a file with .pdf extension but invalid PDF content
1619        fs::write(path.join("bad.pdf"), "this is not a valid pdf")?;
1620        fs::write(path.join("good.txt"), "hello world")?;
1621
1622        let files = vec![path.join("bad.pdf"), path.join("good.txt")];
1623        let config = GrabConfig {
1624            target_path: path.clone(),
1625            add_headers: true,
1626            exclude_patterns: vec![],
1627            include_untracked: false,
1628            include_default_output: false,
1629            no_git: true,
1630            include_tree: false,
1631            convert_pdf: true, // Enable PDF extraction (will fail on bad.pdf)
1632            all_repo: false,
1633        };
1634
1635        let result = crate::processing::process_files(&files, &config, None, &path)?;
1636
1637        // Both files should produce segments
1638        assert_eq!(result.files.len(), 2, "Expected 2 file segments");
1639
1640        let pdf_seg = &result.files[0];
1641        let txt_seg = &result.files[1];
1642
1643        // PDF failure: header should end with \n (not \n\n)
1644        let header = &result.content[pdf_seg.header_range.clone().unwrap()];
1645        assert!(
1646            header.ends_with("---\n"),
1647            "PDF failure header should end with ---\\n, got: {:?}",
1648            header
1649        );
1650        // PDF failure: body_range should be non-empty (contains trailing \n)
1651        assert!(
1652            !pdf_seg.body_range.is_empty(),
1653            "PDF failure body_range should not be empty"
1654        );
1655        let body = &result.content[pdf_seg.body_range.clone()];
1656        assert_eq!(body, "\n", "PDF failure body should be a single newline");
1657
1658        // Successful file: header should also end with \n
1659        let txt_header = &result.content[txt_seg.header_range.clone().unwrap()];
1660        assert!(
1661            txt_header.ends_with("---\n"),
1662            "Normal header should end with ---\\n, got: {:?}",
1663            txt_header
1664        );
1665        // Successful file: body_range should be non-empty
1666        assert!(
1667            !txt_seg.body_range.is_empty(),
1668            "Normal body_range should not be empty"
1669        );
1670
1671        Ok(())
1672    }
1673} // End of mod tests