dirgrab_lib/
lib.rs

1#![doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/README.md"))]
2
3// Declare modules
4mod config;
5mod errors;
6mod listing;
7mod processing;
8mod tree;
9mod utils;
10
11// Necessary imports for lib.rs itself
12use log::{debug, error, info, warn};
13use std::io; // For io::ErrorKind // For logging within grab_contents
14use std::path::{Path, PathBuf};
15
16// Re-export public API components
17pub use config::GrabConfig;
18pub use errors::{GrabError, GrabResult};
19
20// --- Main Public Function ---
21
22/// Performs the main `dirgrab` operation based on the provided configuration.
23pub fn grab_contents(config: &GrabConfig) -> GrabResult<String> {
24    info!("Starting dirgrab operation with config: {:?}", config);
25
26    // Canonicalize cleans the path and checks existence implicitly via OS call
27    let target_path = config.target_path.canonicalize().map_err(|e| {
28        if e.kind() == io::ErrorKind::NotFound {
29            GrabError::TargetPathNotFound(config.target_path.clone())
30        } else {
31            GrabError::IoError {
32                path: config.target_path.clone(),
33                source: e,
34            }
35        }
36    })?;
37    debug!("Canonical target path: {:?}", target_path);
38
39    // Determine file listing mode and potential repo root based on no_git flag
40    let (files_to_process, maybe_repo_root) = if config.no_git {
41        info!("Ignoring Git context due to --no-git flag.");
42        let files = listing::list_files_walkdir(&target_path, config)?;
43        (files, None)
44    } else {
45        let git_repo_root = listing::detect_git_repo(&target_path)?;
46        let scope_subdir = git_repo_root
47            .as_ref()
48            .and_then(|root| derive_scope_subdir(root, &target_path, config));
49
50        let files = match &git_repo_root {
51            Some(root) => {
52                info!("Operating in Git mode. Repo root: {:?}", root);
53                if let Some(scope) = scope_subdir.as_deref() {
54                    info!("Limiting Git file listing to sub-path: {:?}", scope);
55                } else if !config.all_repo {
56                    debug!(
57                        "Scope calculation yielded full repository; processing entire repo contents."
58                    );
59                }
60                listing::list_files_git(root, config, scope_subdir.as_deref())?
61            }
62            None => {
63                info!("Operating in Non-Git mode. Target path: {:?}", target_path);
64                listing::list_files_walkdir(&target_path, config)?
65            }
66        };
67        (files, git_repo_root)
68    };
69
70    info!("Found {} files to process.", files_to_process.len());
71
72    // Initialize output buffer
73    let mut output_buffer = String::new();
74
75    // Generate and prepend tree if requested
76    if config.include_tree {
77        if files_to_process.is_empty() {
78            warn!("--include-tree specified, but no files were selected for processing. Tree will be empty.");
79            // Keep explicit tree header even if empty
80            output_buffer.push_str("---\nDIRECTORY STRUCTURE (No files selected)\n---\n\n");
81            // Don't return early here if we might still process files (though files_to_process is empty...)
82            // Let's adjust: return here ONLY if tree requested AND no files.
83            return Ok(output_buffer);
84        } else {
85            // Determine base path for tree (repo root if git mode, target path otherwise)
86            let base_path_for_tree = if !config.no_git && maybe_repo_root.is_some() {
87                maybe_repo_root.as_deref().unwrap() // Safe unwrap due to is_some() check
88            } else {
89                &target_path
90            };
91            debug!(
92                "Generating directory tree relative to: {:?}",
93                base_path_for_tree
94            );
95
96            match tree::generate_indented_tree(&files_to_process, base_path_for_tree) {
97                Ok(tree_str) => {
98                    output_buffer.push_str("---\nDIRECTORY STRUCTURE\n---\n");
99                    output_buffer.push_str(&tree_str);
100                    output_buffer.push_str("\n---\nFILE CONTENTS\n---\n\n");
101                }
102                Err(e) => {
103                    error!("Failed to generate directory tree: {}", e);
104                    // Still add header indicating failure
105                    output_buffer.push_str("---\nERROR GENERATING DIRECTORY STRUCTURE\n---\n\n");
106                }
107            }
108        }
109    }
110
111    // Process files and append content (only if files exist)
112    if !files_to_process.is_empty() {
113        // Updated call to process_files to pass the whole config struct
114        match processing::process_files(
115            &files_to_process,
116            config, // Pass config struct
117            maybe_repo_root.as_deref(),
118            &target_path,
119        ) {
120            Ok(content) => output_buffer.push_str(&content),
121            Err(e) => {
122                error!("Failed during file content processing: {}", e);
123                return Err(e); // Propagate error if processing fails fundamentally
124            }
125        }
126    } else if !config.include_tree {
127        // If no files AND no tree was requested
128        warn!("No files selected for processing based on current configuration.");
129        // Return empty string only if no files were found AND tree wasn't requested/generated.
130        return Ok(String::new());
131    }
132
133    // Return the combined buffer (might contain only tree, or tree + content, or just content)
134    Ok(output_buffer)
135}
136
137fn derive_scope_subdir(
138    repo_root: &Path,
139    target_path: &Path,
140    config: &GrabConfig,
141) -> Option<PathBuf> {
142    if config.all_repo {
143        return None;
144    }
145
146    match target_path.strip_prefix(repo_root) {
147        Ok(rel) => {
148            if rel.as_os_str().is_empty() {
149                None
150            } else {
151                Some(rel.to_path_buf())
152            }
153        }
154        Err(_) => None,
155    }
156}
157
158// --- FILE: dirgrab-lib/src/lib.rs ---
159// (Showing only the tests module and its necessary imports)
160
161// ... (rest of lib.rs code above) ...
162
163// --- Tests ---
164#[cfg(test)]
165mod tests {
166    // Use super::* to bring everything from lib.rs into scope for tests
167    // This now includes GrabConfig, GrabError, GrabResult because they are re-exported.
168    use super::*;
169    // Also need direct imports for helpers/types used *only* in tests
170    use anyhow::{Context, Result}; // Ensure Context and Result are imported from anyhow
171    use std::collections::HashSet;
172    use std::fs::{self}; // Ensure File is imported if needed by helpers
173    use std::path::{Path, PathBuf}; // Need these for helpers defined within tests mod
174    use std::process::Command;
175    use tempfile::{tempdir, TempDir};
176
177    // --- Test Setup Helpers ---
178    fn setup_test_dir() -> Result<(TempDir, PathBuf)> {
179        let dir = tempdir()?;
180        let path = dir.path().to_path_buf();
181
182        fs::write(path.join("file1.txt"), "Content of file 1.")?;
183        fs::write(path.join("file2.rs"), "fn main() {}")?;
184        fs::create_dir_all(path.join("subdir"))?; // Use create_dir_all
185        fs::write(path.join("subdir").join("file3.log"), "Log message.")?;
186        fs::write(
187            path.join("subdir").join("another.txt"),
188            "Another text file.",
189        )?;
190        fs::write(path.join("binary.dat"), [0x80, 0x81, 0x82])?;
191        fs::write(path.join("dirgrab.txt"), "Previous dirgrab output.")?;
192        Ok((dir, path))
193    }
194
195    fn setup_git_repo(path: &Path) -> Result<bool> {
196        if Command::new("git").arg("--version").output().is_err() {
197            eprintln!("WARN: 'git' command not found, skipping Git-related test setup.");
198            return Ok(false);
199        }
200        // Use crate:: path now because utils is not in super::* scope
201        crate::utils::run_command("git", &["init", "-b", "main"], path)?;
202        crate::utils::run_command("git", &["config", "user.email", "test@example.com"], path)?;
203        crate::utils::run_command("git", &["config", "user.name", "Test User"], path)?;
204        // Configure Git to handle potential CRLF issues on Windows in tests if needed
205        crate::utils::run_command("git", &["config", "core.autocrlf", "false"], path)?;
206
207        fs::write(path.join(".gitignore"), "*.log\nbinary.dat\nfile1.txt")?;
208        crate::utils::run_command(
209            "git",
210            &["add", ".gitignore", "file2.rs", "subdir/another.txt"],
211            path,
212        )?;
213        crate::utils::run_command("git", &["commit", "-m", "Initial commit"], path)?;
214
215        fs::write(path.join("untracked.txt"), "This file is not tracked.")?;
216        fs::write(path.join("ignored.log"), "This should be ignored by git.")?;
217        fs::create_dir_all(path.join("deep/sub"))?;
218        fs::write(path.join("deep/sub/nested.txt"), "Nested content")?;
219        crate::utils::run_command("git", &["add", "deep/sub/nested.txt"], path)?;
220        crate::utils::run_command("git", &["commit", "-m", "Add nested file"], path)?;
221        Ok(true)
222    }
223
224    fn run_test_command(
225        cmd: &str,
226        args: &[&str],
227        current_dir: &Path,
228    ) -> Result<std::process::Output> {
229        let output = crate::utils::run_command(cmd, args, current_dir)?;
230        if !output.status.success() {
231            let stderr = String::from_utf8_lossy(&output.stderr);
232            let stdout = String::from_utf8_lossy(&output.stdout);
233            anyhow::bail!(
234                "Command failed: {} {:?}\nStatus: {}\nStdout: {}\nStderr: {}",
235                cmd,
236                args,
237                output.status,
238                stdout,
239                stderr
240            );
241        }
242        Ok(output)
243    }
244
245    fn get_expected_set(base_path: &Path, relative_paths: &[&str]) -> HashSet<PathBuf> {
246        relative_paths.iter().map(|p| base_path.join(p)).collect()
247    }
248
249    fn assert_paths_eq(actual: Vec<PathBuf>, expected: HashSet<PathBuf>) {
250        let actual_set: HashSet<PathBuf> = actual.into_iter().collect();
251        assert_eq!(
252            actual_set, expected,
253            "Path sets differ.\nActual paths: {:?}\nExpected paths: {:?}",
254            actual_set, expected
255        );
256    }
257
258    // --- Tests ---
259    // Tests calling listing functions need crate:: prefix
260    #[test]
261    fn test_detect_git_repo_inside() -> Result<()> {
262        let (_dir, path) = setup_test_dir()?;
263        if !setup_git_repo(&path)? {
264            println!("Skipping Git test: git not found or setup failed.");
265            return Ok(());
266        }
267        let maybe_root = crate::listing::detect_git_repo(&path)?; // Use crate:: path
268        assert!(maybe_root.is_some());
269        assert_eq!(maybe_root.unwrap().canonicalize()?, path.canonicalize()?);
270        let subdir_path = path.join("subdir");
271        let maybe_root_from_subdir = crate::listing::detect_git_repo(&subdir_path)?; // Use crate:: path
272        assert!(maybe_root_from_subdir.is_some());
273        assert_eq!(
274            maybe_root_from_subdir.unwrap().canonicalize()?,
275            path.canonicalize()?
276        );
277        Ok(())
278    }
279
280    #[test]
281    fn test_detect_git_repo_outside() -> Result<()> {
282        let (_dir, path) = setup_test_dir()?;
283        // Ensure no git repo exists here
284        let maybe_root = crate::listing::detect_git_repo(&path)?; // Use crate:: path
285        assert!(maybe_root.is_none());
286        Ok(())
287    }
288
289    #[test]
290    fn test_list_files_walkdir_no_exclude_default_excludes_dirgrab_txt() -> Result<()> {
291        let (_dir, path) = setup_test_dir()?;
292        let config = GrabConfig {
293            target_path: path.clone(),
294            add_headers: false,
295            exclude_patterns: vec![],
296            include_untracked: false,      // No effect in walkdir
297            include_default_output: false, // Exclude dirgrab.txt
298            no_git: true,                  // Force walkdir
299            include_tree: false,
300            convert_pdf: false,
301            all_repo: false,
302        };
303        let files = crate::listing::list_files_walkdir(&path, &config)?; // Use crate:: path
304        let expected_set = get_expected_set(
305            &path,
306            &[
307                "file1.txt",
308                "file2.rs",
309                "subdir/file3.log",
310                "subdir/another.txt",
311                "binary.dat",
312                // "dirgrab.txt" should be excluded by default
313            ],
314        );
315        assert_paths_eq(files, expected_set);
316        Ok(())
317    }
318
319    #[test]
320    fn test_list_files_walkdir_with_exclude() -> Result<()> {
321        let (_dir, path) = setup_test_dir()?;
322        let config = GrabConfig {
323            target_path: path.clone(),
324            add_headers: false,
325            exclude_patterns: vec!["*.log".to_string(), "subdir/".to_string()], // User excludes
326            include_untracked: false,
327            include_default_output: false,
328            no_git: true, // Force walkdir
329            include_tree: false,
330            convert_pdf: false,
331            all_repo: false,
332        };
333        let files = crate::listing::list_files_walkdir(&path, &config)?; // Use crate:: path
334        let expected_set = get_expected_set(
335            &path,
336            &[
337                "file1.txt",
338                "file2.rs",
339                "binary.dat",
340                // subdir/* excluded
341                // dirgrab.txt excluded by default
342            ],
343        );
344        assert_paths_eq(files, expected_set);
345        Ok(())
346    }
347
348    #[test]
349    fn test_list_files_git_tracked_only_default_excludes_dirgrab_txt() -> Result<()> {
350        let (_dir, path) = setup_test_dir()?;
351        if !setup_git_repo(&path)? {
352            println!("Skipping Git test: git not found or setup failed.");
353            return Ok(());
354        }
355        let config = GrabConfig {
356            target_path: path.clone(), // Target doesn't matter as much as root for list_files_git
357            add_headers: false,
358            exclude_patterns: vec![],
359            include_untracked: false,      // Tracked only
360            include_default_output: false, // Exclude dirgrab.txt
361            no_git: false,                 // Use Git
362            include_tree: false,
363            convert_pdf: false,
364            all_repo: false,
365        };
366        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path, pass repo root
367        let expected_set = get_expected_set(
368            &path,
369            &[
370                ".gitignore",
371                "file2.rs",
372                "subdir/another.txt",
373                "deep/sub/nested.txt",
374                // file1.txt ignored by .gitignore
375                // file3.log ignored by .gitignore
376                // binary.dat ignored by .gitignore
377                // dirgrab.txt not tracked and default excluded
378                // untracked.txt not tracked
379                // ignored.log not tracked
380            ],
381        );
382        assert_paths_eq(files, expected_set);
383        Ok(())
384    }
385
386    #[test]
387    fn test_list_files_git_include_untracked_default_excludes_dirgrab_txt() -> Result<()> {
388        let (_dir, path) = setup_test_dir()?;
389        if !setup_git_repo(&path)? {
390            println!("Skipping Git test: git not found or setup failed.");
391            return Ok(());
392        }
393        let config = GrabConfig {
394            target_path: path.clone(),
395            add_headers: false,
396            exclude_patterns: vec![],
397            include_untracked: true,       // Include untracked
398            include_default_output: false, // Exclude dirgrab.txt
399            no_git: false,                 // Use Git
400            include_tree: false,
401            convert_pdf: false,
402            all_repo: false,
403        };
404        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
405        let expected_set = get_expected_set(
406            &path,
407            &[
408                ".gitignore",
409                "file2.rs",
410                "subdir/another.txt",
411                "deep/sub/nested.txt",
412                "untracked.txt", // Included now
413                                 // file1.txt ignored by .gitignore
414                                 // file3.log ignored by .gitignore
415                                 // binary.dat ignored by .gitignore
416                                 // ignored.log ignored by .gitignore (via --exclude-standard)
417                                 // dirgrab.txt untracked and default excluded
418            ],
419        );
420        assert_paths_eq(files, expected_set);
421        Ok(())
422    }
423
424    #[test]
425    fn test_list_files_git_with_exclude() -> Result<()> {
426        let (_dir, path) = setup_test_dir()?;
427        if !setup_git_repo(&path)? {
428            println!("Skipping Git test: git not found or setup failed.");
429            return Ok(());
430        }
431        let config = GrabConfig {
432            target_path: path.clone(),
433            add_headers: false,
434            exclude_patterns: vec![
435                "*.rs".to_string(),    // Exclude rust files
436                "subdir/".to_string(), // Exclude subdir/
437                "deep/".to_string(),   // Exclude deep/
438            ],
439            include_untracked: false, // Tracked only
440            include_default_output: false,
441            no_git: false, // Use Git
442            include_tree: false,
443            convert_pdf: false,
444            all_repo: false,
445        };
446        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
447        let expected_set = get_expected_set(&path, &[".gitignore"]); // Only .gitignore remains
448        assert_paths_eq(files, expected_set);
449        Ok(())
450    }
451
452    #[test]
453    fn test_list_files_git_untracked_with_exclude() -> Result<()> {
454        let (_dir, path) = setup_test_dir()?;
455        if !setup_git_repo(&path)? {
456            println!("Skipping Git test: git not found or setup failed.");
457            return Ok(());
458        }
459        let config = GrabConfig {
460            target_path: path.clone(),
461            add_headers: false,
462            exclude_patterns: vec!["*.txt".to_string()], // Exclude all .txt files
463            include_untracked: true,                     // Include untracked
464            include_default_output: false,
465            no_git: false, // Use Git
466            include_tree: false,
467            convert_pdf: false,
468            all_repo: false,
469        };
470        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
471        let expected_set = get_expected_set(
472            &path,
473            &[
474                ".gitignore",
475                "file2.rs",
476                // subdir/another.txt excluded by *.txt
477                // deep/sub/nested.txt excluded by *.txt
478                // untracked.txt excluded by *.txt
479                // dirgrab.txt excluded by default
480            ],
481        );
482        assert_paths_eq(files, expected_set);
483        Ok(())
484    }
485
486    #[test]
487    fn test_list_files_walkdir_include_default_output() -> Result<()> {
488        let (_dir, path) = setup_test_dir()?;
489        let config = GrabConfig {
490            target_path: path.clone(),
491            add_headers: false,
492            exclude_patterns: vec![],
493            include_untracked: false,
494            include_default_output: true, // Include dirgrab.txt
495            no_git: true,                 // Force walkdir
496            include_tree: false,
497            convert_pdf: false,
498            all_repo: false,
499        };
500        let files = crate::listing::list_files_walkdir(&path, &config)?; // Use crate:: path
501        let expected_set = get_expected_set(
502            &path,
503            &[
504                "file1.txt",
505                "file2.rs",
506                "subdir/file3.log",
507                "subdir/another.txt",
508                "binary.dat",
509                "dirgrab.txt", // Included now
510            ],
511        );
512        assert_paths_eq(files, expected_set);
513        Ok(())
514    }
515
516    #[test]
517    fn test_list_files_git_include_default_output_tracked_only() -> Result<()> {
518        let (_dir, path) = setup_test_dir()?;
519        if !setup_git_repo(&path)? {
520            println!("Skipping Git test: git not found or setup failed.");
521            return Ok(());
522        }
523        // Make dirgrab.txt tracked
524        fs::write(path.join("dirgrab.txt"), "Tracked dirgrab output.")?;
525        run_test_command("git", &["add", "dirgrab.txt"], &path)?;
526        run_test_command("git", &["commit", "-m", "Add dirgrab.txt"], &path)?;
527
528        let config = GrabConfig {
529            target_path: path.clone(),
530            add_headers: false,
531            exclude_patterns: vec![],
532            include_untracked: false,     // Tracked only
533            include_default_output: true, // Include dirgrab.txt
534            no_git: false,                // Use Git
535            include_tree: false,
536            convert_pdf: false,
537            all_repo: false,
538        };
539        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
540        let expected_set = get_expected_set(
541            &path,
542            &[
543                ".gitignore",
544                "file2.rs",
545                "subdir/another.txt",
546                "deep/sub/nested.txt",
547                "dirgrab.txt", // Included because tracked and override flag set
548            ],
549        );
550        assert_paths_eq(files, expected_set);
551        Ok(())
552    }
553
554    #[test]
555    fn test_list_files_git_include_default_output_with_untracked() -> Result<()> {
556        let (_dir, path) = setup_test_dir()?;
557        if !setup_git_repo(&path)? {
558            println!("Skipping Git test: git not found or setup failed.");
559            return Ok(());
560        }
561        // dirgrab.txt is untracked in this setup
562        let config = GrabConfig {
563            target_path: path.clone(),
564            add_headers: false,
565            exclude_patterns: vec![],
566            include_untracked: true,      // Include untracked
567            include_default_output: true, // Include dirgrab.txt
568            no_git: false,                // Use Git
569            include_tree: false,
570            convert_pdf: false,
571            all_repo: false,
572        };
573        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
574        let expected_set = get_expected_set(
575            &path,
576            &[
577                ".gitignore",
578                "file2.rs",
579                "subdir/another.txt",
580                "deep/sub/nested.txt",
581                "untracked.txt", // Included
582                "dirgrab.txt",   // Included because untracked and override flag set
583            ],
584        );
585        assert_paths_eq(files, expected_set);
586        Ok(())
587    }
588
589    #[test]
590    fn test_list_files_git_include_default_output_but_excluded_by_user() -> Result<()> {
591        let (_dir, path) = setup_test_dir()?;
592        if !setup_git_repo(&path)? {
593            println!("Skipping Git test: git not found or setup failed.");
594            return Ok(());
595        }
596        let config = GrabConfig {
597            target_path: path.clone(),
598            add_headers: false,
599            exclude_patterns: vec!["dirgrab.txt".to_string()], // User explicitly excludes
600            include_untracked: true,
601            include_default_output: true, // Override default exclusion, but user exclusion takes precedence
602            no_git: false,                // Use Git
603            include_tree: false,
604            convert_pdf: false,
605            all_repo: false,
606        };
607        let files = crate::listing::list_files_git(&path, &config, None)?; // Use crate:: path
608        let expected_set = get_expected_set(
609            &path,
610            &[
611                ".gitignore",
612                "file2.rs",
613                "subdir/another.txt",
614                "deep/sub/nested.txt",
615                "untracked.txt",
616                // dirgrab.txt excluded by user pattern
617            ],
618        );
619        assert_paths_eq(files, expected_set);
620        Ok(())
621    }
622
623    #[test]
624    fn test_list_files_git_scoped_to_subdir() -> Result<()> {
625        let (_dir, path) = setup_test_dir()?;
626        if !setup_git_repo(&path)? {
627            println!("Skipping Git test: git not found or setup failed.");
628            return Ok(());
629        }
630
631        fs::write(path.join("deep/untracked_inside.txt"), "scoped content")?;
632
633        let config = GrabConfig {
634            target_path: path.join("deep"),
635            add_headers: false,
636            exclude_patterns: vec![],
637            include_untracked: true,
638            include_default_output: false,
639            no_git: false,
640            include_tree: false,
641            convert_pdf: false,
642            all_repo: false,
643        };
644        let scope = Path::new("deep");
645        let files = crate::listing::list_files_git(&path, &config, Some(scope))?;
646        let expected_set =
647            get_expected_set(&path, &["deep/sub/nested.txt", "deep/untracked_inside.txt"]);
648        assert_paths_eq(files, expected_set);
649        Ok(())
650    }
651
652    #[test]
653    fn test_no_git_flag_forces_walkdir_in_git_repo() -> Result<()> {
654        let (_dir, path) = setup_test_dir()?;
655        if !setup_git_repo(&path)? {
656            println!("Skipping Git test: git not found or setup failed.");
657            return Ok(());
658        }
659        let config = GrabConfig {
660            target_path: path.clone(),
661            add_headers: false, // No headers for easier content check
662            exclude_patterns: vec![],
663            include_untracked: false,      // No effect
664            include_default_output: false, // Exclude dirgrab.txt
665            no_git: true,                  // Force walkdir
666            include_tree: false,           // No tree for easier content check
667            convert_pdf: false,
668            all_repo: false,
669        };
670        let result_string = grab_contents(&config)?;
671
672        // Check content from files that would be ignored by git but included by walkdir
673        assert!(
674            result_string.contains("Content of file 1."),
675            "file1.txt content missing"
676        ); // Ignored by .gitignore, but walkdir includes
677        assert!(
678            result_string.contains("Log message."),
679            "file3.log content missing"
680        ); // Ignored by .gitignore, but walkdir includes
681        assert!(
682            result_string.contains("fn main() {}"),
683            "file2.rs content missing"
684        ); // Tracked by git, included by walkdir
685        assert!(
686            result_string.contains("Another text file."),
687            "another.txt content missing"
688        ); // Tracked by git, included by walkdir
689        assert!(
690            !result_string.contains("Previous dirgrab output."),
691            "dirgrab.txt included unexpectedly"
692        ); // Excluded by default
693
694        // The binary file binary.dat is skipped because it's not valid UTF-8.
695        // The processing function logs a warning. We don't need to assert its absence
696        // in the final string, as it cannot be represented in a valid Rust String anyway.
697        // The fact that grab_contents completes successfully and includes the text files is sufficient.
698
699        Ok(())
700    }
701
702    #[test]
703    fn test_no_git_flag_still_respects_exclude_patterns() -> Result<()> {
704        let (_dir, path) = setup_test_dir()?;
705        if !setup_git_repo(&path)? {
706            println!("Skipping Git test: git not found or setup failed.");
707            return Ok(());
708        }
709        let config = GrabConfig {
710            target_path: path.clone(),
711            add_headers: false,
712            exclude_patterns: vec!["*.txt".to_string(), "*.rs".to_string()], // Exclude .txt and .rs
713            include_untracked: false,
714            include_default_output: false,
715            no_git: true, // Force walkdir
716            include_tree: false,
717            convert_pdf: false,
718            all_repo: false,
719        };
720        let result_string = grab_contents(&config)?;
721
722        assert!(result_string.contains("Log message."), "file3.log missing"); // Included
723        assert!(
724            !result_string.contains("Content of file 1."),
725            "file1.txt included unexpectedly"
726        ); // Excluded by *.txt
727        assert!(
728            !result_string.contains("fn main() {}"),
729            "file2.rs included unexpectedly"
730        ); // Excluded by *.rs
731        assert!(
732            !result_string.contains("Another text file."),
733            "another.txt included unexpectedly"
734        ); // Excluded by *.txt
735        assert!(
736            !result_string.contains("Nested content"),
737            "nested.txt included unexpectedly"
738        ); // Excluded by *.txt
739        assert!(
740            !result_string.contains("Previous dirgrab output."),
741            "dirgrab.txt included unexpectedly"
742        ); // Excluded by default & *.txt
743
744        Ok(())
745    }
746
747    #[test]
748    fn test_no_git_flag_with_include_default_output() -> Result<()> {
749        let (_dir, path) = setup_test_dir()?;
750        if !setup_git_repo(&path)? {
751            println!("Skipping Git test: git not found or setup failed.");
752            return Ok(());
753        }
754        let config = GrabConfig {
755            target_path: path.clone(),
756            add_headers: false,
757            exclude_patterns: vec![],
758            include_untracked: false,
759            include_default_output: true, // Include dirgrab.txt
760            no_git: true,                 // Force walkdir
761            include_tree: false,
762            convert_pdf: false,
763            all_repo: false,
764        };
765        let result_string = grab_contents(&config)?;
766        assert!(
767            result_string.contains("Previous dirgrab output."),
768            "Should include dirgrab.txt due to override"
769        );
770        Ok(())
771    }
772
773    #[test]
774    fn test_no_git_flag_headers_relative_to_target() -> Result<()> {
775        let (_dir, path) = setup_test_dir()?;
776        if !setup_git_repo(&path)? {
777            println!("Skipping Git test: git not found or setup failed.");
778            return Ok(());
779        }
780        let config = GrabConfig {
781            target_path: path.clone(), // Target is repo root
782            add_headers: true,         // Enable headers
783            exclude_patterns: vec![
784                "*.log".to_string(),
785                "*.dat".to_string(),
786                "dirgrab.txt".to_string(),
787            ], // Simplify output
788            include_untracked: false,
789            include_default_output: false,
790            no_git: true,        // Force walkdir
791            include_tree: false, // No tree
792            convert_pdf: false,
793            all_repo: false,
794        };
795        let result_string = grab_contents(&config)?;
796
797        // file1.txt is ignored by .gitignore but included here because no_git=true
798        let expected_header_f1 = format!("--- FILE: {} ---", Path::new("file1.txt").display());
799        assert!(
800            result_string.contains(&expected_header_f1),
801            "Header path should be relative to target_path. Expected '{}' in output:\n{}",
802            expected_header_f1,
803            result_string
804        );
805
806        // .gitignore itself is not usually listed by walkdir unless explicitly targeted? Let's check file2.rs
807        let expected_header_f2 = format!("--- FILE: {} ---", Path::new("file2.rs").display());
808        assert!(
809            result_string.contains(&expected_header_f2),
810            "Header path should be relative to target_path. Expected '{}' in output:\n{}",
811            expected_header_f2,
812            result_string
813        );
814
815        let expected_nested_header = format!(
816            "--- FILE: {} ---",
817            Path::new("deep/sub/nested.txt").display()
818        );
819        assert!(
820            result_string.contains(&expected_nested_header),
821            "Nested header path relative to target_path. Expected '{}' in output:\n{}",
822            expected_nested_header,
823            result_string
824        );
825        Ok(())
826    }
827
828    #[test]
829    fn test_git_mode_headers_relative_to_repo_root() -> Result<()> {
830        let (_dir, path) = setup_test_dir()?;
831        if !setup_git_repo(&path)? {
832            println!("Skipping Git test: git not found or setup failed.");
833            return Ok(());
834        }
835        let subdir_target = path.join("deep"); // Target is inside the repo
836        fs::create_dir_all(&subdir_target)?; // Ensure target exists
837
838        let config = GrabConfig {
839            target_path: subdir_target.clone(), // Target is 'deep' subdir
840            add_headers: true,                  // Enable headers
841            exclude_patterns: vec![],
842            include_untracked: false, // Tracked only
843            include_default_output: false,
844            no_git: false,       // Use Git mode
845            include_tree: false, // No tree
846            convert_pdf: false,
847            all_repo: false,
848        };
849        let result_string = grab_contents(&config)?; // Should still find files relative to repo root
850
851        // Check headers are relative to repo root (path), not target_path (subdir_target)
852        let expected_nested_header = format!(
853            "--- FILE: {} ---",
854            Path::new("deep/sub/nested.txt").display()
855        );
856        assert!(
857            result_string.contains(&expected_nested_header),
858            "Header path should be relative to repo root. Expected '{}' in output:\n{}",
859            expected_nested_header,
860            result_string
861        );
862
863        // Check other files outside the target dir are also included and relative to root
864        let unexpected_root_header = format!("--- FILE: {} ---", Path::new(".gitignore").display());
865        assert!(
866            !result_string.contains(&unexpected_root_header),
867            "Scoped results should not include repo-root files. Unexpected '{}' in output:\n{}",
868            unexpected_root_header,
869            result_string
870        );
871        let unexpected_rs_header = format!("--- FILE: {} ---", Path::new("file2.rs").display());
872        assert!(
873            !result_string.contains(&unexpected_rs_header),
874            "Scoped results should not include repo-root files. Unexpected '{}' in output:\n{}",
875            unexpected_rs_header,
876            result_string
877        );
878        Ok(())
879    }
880
881    #[test]
882    fn test_grab_contents_with_tree_no_git() -> Result<()> {
883        let (_dir, path) = setup_test_dir()?;
884        // Don't need git repo setup for no_git test, but keep files consistent
885        fs::write(path.join(".gitignore"), "*.log\nbinary.dat")?; // Create dummy .gitignore
886        fs::create_dir_all(path.join("deep/sub"))?;
887        fs::write(path.join("deep/sub/nested.txt"), "Nested content")?;
888        fs::write(path.join("untracked.txt"), "Untracked content")?; // File exists
889
890        let config = GrabConfig {
891            target_path: path.clone(),
892            add_headers: true,
893            exclude_patterns: vec![
894                "*.log".to_string(),       // Exclude logs
895                "*.dat".to_string(),       // Exclude binary
896                ".gitignore".to_string(),  // Exclude .gitignore itself
897                "dirgrab.txt".to_string(), // Exclude default output file explicitly too
898            ],
899            include_untracked: false,      // No effect
900            include_default_output: false, // Also excluded above
901            no_git: true,                  // Force walkdir
902            include_tree: true,            // THE flag to test
903            convert_pdf: false,
904            all_repo: false,
905        };
906        let result = grab_contents(&config)?;
907
908        // Expected tree for walkdir with excludes applied
909        // file1.txt, file2.rs, another.txt, nested.txt, untracked.txt should remain
910        let expected_tree_part = "\
911---
912DIRECTORY STRUCTURE
913---
914- deep/
915  - sub/
916    - nested.txt
917- file1.txt
918- file2.rs
919- subdir/
920  - another.txt
921- untracked.txt
922";
923
924        assert!(
925            result.contains(expected_tree_part),
926            "Expected tree structure not found in output:\nTree Section:\n---\n{}\n---",
927            result
928                .split("---\nFILE CONTENTS\n---")
929                .next()
930                .unwrap_or("TREE NOT FOUND")
931        );
932
933        assert!(
934            result.contains("\n---\nFILE CONTENTS\n---\n\n"),
935            "Expected file content separator not found"
936        );
937        // Check presence of headers and content for included files
938        assert!(
939            result.contains("--- FILE: file1.txt ---"),
940            "Header for file1.txt missing"
941        );
942        assert!(
943            result.contains("Content of file 1."),
944            "Content of file1.txt missing"
945        );
946        assert!(
947            result.contains("--- FILE: deep/sub/nested.txt ---"),
948            "Header for nested.txt missing"
949        );
950        assert!(
951            result.contains("Nested content"),
952            "Content of nested.txt missing"
953        );
954        // Check absence of excluded file content
955        assert!(
956            !result.contains("Previous dirgrab output."),
957            "dirgrab.txt content included unexpectedly"
958        );
959        assert!(
960            !result.contains("Log message"),
961            "Log content included unexpectedly"
962        );
963
964        Ok(())
965    }
966
967    #[test]
968    fn test_grab_contents_with_tree_git_mode() -> Result<()> {
969        let (_dir, path) = setup_test_dir()?;
970        if !setup_git_repo(&path)? {
971            println!("Skipping Git test: git not found or setup failed.");
972            return Ok(());
973        }
974        let config = GrabConfig {
975            target_path: path.clone(),
976            add_headers: true,
977            exclude_patterns: vec![".gitignore".to_string()], // Exclude .gitignore
978            include_untracked: true,                          // Include untracked
979            include_default_output: false,                    // Exclude dirgrab.txt (default)
980            no_git: false,                                    // Use Git
981            include_tree: true,                               // Include tree
982            convert_pdf: false,
983            all_repo: false,
984        };
985        let result = grab_contents(&config)?;
986
987        // Expected tree for git ls-files -ou --exclude-standard :!.gitignore :!dirgrab.txt
988        // Should include: file2.rs, another.txt, nested.txt, untracked.txt
989        let expected_tree_part = "\
990---
991DIRECTORY STRUCTURE
992---
993- deep/
994  - sub/
995    - nested.txt
996- file2.rs
997- subdir/
998  - another.txt
999- untracked.txt
1000";
1001        assert!(
1002            result.contains(expected_tree_part),
1003            "Expected tree structure not found in output:\nTree Section:\n---\n{}\n---",
1004            result
1005                .split("---\nFILE CONTENTS\n---")
1006                .next()
1007                .unwrap_or("TREE NOT FOUND")
1008        );
1009        assert!(
1010            result.contains("\n---\nFILE CONTENTS\n---\n\n"),
1011            "Separator missing"
1012        );
1013        // Check content
1014        assert!(
1015            result.contains("--- FILE: file2.rs ---"),
1016            "file2.rs header missing"
1017        );
1018        assert!(result.contains("fn main() {}"), "file2.rs content missing");
1019        assert!(
1020            result.contains("--- FILE: untracked.txt ---"),
1021            "untracked.txt header missing"
1022        );
1023        assert!(
1024            result.contains("This file is not tracked."),
1025            "untracked.txt content missing"
1026        );
1027        assert!(
1028            !result.contains("--- FILE: .gitignore ---"),
1029            ".gitignore included unexpectedly"
1030        );
1031
1032        Ok(())
1033    }
1034
1035    #[test]
1036    fn test_grab_contents_with_tree_empty() -> Result<()> {
1037        let (_dir, path) = setup_test_dir()?;
1038        // No need for files if we exclude everything
1039        let config = GrabConfig {
1040            target_path: path.clone(),
1041            add_headers: true,
1042            exclude_patterns: vec!["*".to_string(), "*/".to_string()], // Exclude everything
1043            include_untracked: true,
1044            include_default_output: true,
1045            no_git: true,       // Use walkdir
1046            include_tree: true, // Ask for tree
1047            convert_pdf: false,
1048            all_repo: false,
1049        };
1050        let result = grab_contents(&config)?;
1051        // Expect only the empty tree message
1052        let expected = "---\nDIRECTORY STRUCTURE (No files selected)\n---\n\n";
1053        assert_eq!(result, expected);
1054        Ok(())
1055    }
1056
1057    // Tests calling internal helpers need crate:: prefix
1058    #[test]
1059    fn test_generate_indented_tree_simple() -> Result<()> {
1060        let tmp_dir = tempdir()?;
1061        let proj_dir = tmp_dir.path().join("project");
1062        fs::create_dir_all(proj_dir.join("src"))?;
1063        fs::create_dir_all(proj_dir.join("tests"))?;
1064        fs::write(proj_dir.join("src/main.rs"), "")?;
1065        fs::write(proj_dir.join("README.md"), "")?;
1066        fs::write(proj_dir.join("src/lib.rs"), "")?;
1067        fs::write(proj_dir.join("tests/basic.rs"), "")?;
1068
1069        // Simulate paths relative to a base (doesn't have to exist for this test)
1070        let base = PathBuf::from("/project"); // Logical base
1071        let files_logical = [
1072            // Use array for BTreeSet later if needed
1073            base.join("src/main.rs"),
1074            base.join("README.md"),
1075            base.join("src/lib.rs"),
1076            base.join("tests/basic.rs"),
1077        ];
1078
1079        // Map logical paths to actual paths in temp dir for is_dir() check
1080        let files_in_tmp = files_logical
1081            .iter()
1082            .map(|p| tmp_dir.path().join(p.strip_prefix("/").unwrap()))
1083            .collect::<Vec<_>>();
1084        let base_in_tmp = tmp_dir.path().join("project"); // The actual base path
1085
1086        let tree = crate::tree::generate_indented_tree(&files_in_tmp, &base_in_tmp)?; // Use crate:: path
1087        let expected = "\
1088- README.md
1089- src/
1090  - lib.rs
1091  - main.rs
1092- tests/
1093  - basic.rs
1094";
1095        assert_eq!(tree, expected);
1096        Ok(())
1097    }
1098
1099    #[test]
1100    fn test_generate_indented_tree_deeper() -> Result<()> {
1101        let tmp_dir = tempdir()?;
1102        let proj_dir = tmp_dir.path().join("project");
1103        fs::create_dir_all(proj_dir.join("a/b/c"))?;
1104        fs::create_dir_all(proj_dir.join("a/d"))?;
1105        fs::write(proj_dir.join("a/b/c/file1.txt"), "")?;
1106        fs::write(proj_dir.join("a/d/file2.txt"), "")?;
1107        fs::write(proj_dir.join("top.txt"), "")?;
1108        fs::write(proj_dir.join("a/b/file3.txt"), "")?;
1109
1110        let base = PathBuf::from("/project"); // Logical base
1111        let files_logical = [
1112            base.join("a/b/c/file1.txt"),
1113            base.join("a/d/file2.txt"),
1114            base.join("top.txt"),
1115            base.join("a/b/file3.txt"),
1116        ];
1117
1118        let files_in_tmp = files_logical
1119            .iter()
1120            .map(|p| tmp_dir.path().join(p.strip_prefix("/").unwrap()))
1121            .collect::<Vec<_>>();
1122        let base_in_tmp = tmp_dir.path().join("project"); // Actual base
1123
1124        let tree = crate::tree::generate_indented_tree(&files_in_tmp, &base_in_tmp)?; // Use crate:: path
1125        let expected = "\
1126- a/
1127  - b/
1128    - c/
1129      - file1.txt
1130    - file3.txt
1131  - d/
1132    - file2.txt
1133- top.txt
1134";
1135        assert_eq!(tree, expected);
1136        Ok(())
1137    }
1138
1139    // --- Tests for processing.rs (Updated to pass GrabConfig) ---
1140    #[test]
1141    fn test_process_files_no_headers_skip_binary() -> Result<()> {
1142        let (_dir, path) = setup_test_dir()?;
1143        let files_to_process = vec![
1144            path.join("file1.txt"),
1145            path.join("binary.dat"), // Should be skipped as non-utf8
1146            path.join("file2.rs"),
1147        ];
1148        let config = GrabConfig {
1149            // Create dummy config
1150            target_path: path.clone(),
1151            add_headers: false, // Key part of this test
1152            exclude_patterns: vec![],
1153            include_untracked: false,
1154            include_default_output: false,
1155            no_git: true, // Assume non-git mode for simplicity here
1156            include_tree: false,
1157            convert_pdf: false, // PDF conversion off
1158            all_repo: false,
1159        };
1160        let result = crate::processing::process_files(&files_to_process, &config, None, &path)?; // Pass config
1161                                                                                                 // Expected content: file1, newline, newline, file2, newline, newline
1162        let expected_content = "Content of file 1.\n\nfn main() {}\n\n";
1163        assert_eq!(result, expected_content); // Compare exact expected string
1164        Ok(())
1165    }
1166
1167    #[test]
1168    fn test_process_files_with_headers_git_mode() -> Result<()> {
1169        let (_dir, path) = setup_test_dir()?;
1170        // Don't need full git setup if we just provide repo_root
1171        let files_to_process = vec![path.join("file1.txt"), path.join("file2.rs")];
1172        let repo_root = Some(path.as_path());
1173        let config = GrabConfig {
1174            target_path: path.clone(), // target can be same as root for this test
1175            add_headers: true,         // Key part of this test
1176            exclude_patterns: vec![],
1177            include_untracked: false,
1178            include_default_output: false,
1179            no_git: false, // Git mode ON
1180            include_tree: false,
1181            convert_pdf: false,
1182            all_repo: false,
1183        };
1184        let result =
1185            crate::processing::process_files(&files_to_process, &config, repo_root, &path)?;
1186        let expected_content = format!(
1187            "--- FILE: {} ---\nContent of file 1.\n\n--- FILE: {} ---\nfn main() {{}}\n\n",
1188            Path::new("file1.txt").display(), // Paths relative to repo_root (which is path)
1189            Path::new("file2.rs").display()
1190        );
1191        assert_eq!(result, expected_content);
1192        Ok(())
1193    }
1194
1195    #[test]
1196    fn test_process_files_headers_no_git_mode() -> Result<()> {
1197        let (_dir, path) = setup_test_dir()?;
1198        let files_to_process = vec![path.join("file1.txt"), path.join("subdir/another.txt")];
1199        let config = GrabConfig {
1200            target_path: path.clone(), // Target path is the base
1201            add_headers: true,         // Key part of this test
1202            exclude_patterns: vec![],
1203            include_untracked: false,
1204            include_default_output: false,
1205            no_git: true, // Git mode OFF
1206            include_tree: false,
1207            convert_pdf: false,
1208            all_repo: false,
1209        };
1210        let result = crate::processing::process_files(&files_to_process, &config, None, &path)?;
1211        let expected_content = format!(
1212            "--- FILE: {} ---\nContent of file 1.\n\n--- FILE: {} ---\nAnother text file.\n\n",
1213            Path::new("file1.txt").display(), // Paths relative to target_path
1214            Path::new("subdir/another.txt").display()
1215        );
1216        assert_eq!(result, expected_content);
1217        Ok(())
1218    }
1219
1220    #[test]
1221    fn test_grab_contents_with_pdf_conversion_enabled() -> Result<()> {
1222        let (_dir, path) = setup_test_dir()?;
1223        let base_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1224        let fixtures_dir = base_dir.join("tests/fixtures");
1225        fs::create_dir_all(&fixtures_dir)?;
1226        let fixture_pdf_src = fixtures_dir.join("sample.pdf");
1227
1228        if !fixture_pdf_src.exists() {
1229            anyhow::bail!("Fixture PDF not found at {:?}", fixture_pdf_src);
1230        }
1231
1232        let fixture_pdf_dest = path.join("sample.pdf");
1233        fs::copy(&fixture_pdf_src, &fixture_pdf_dest).with_context(|| {
1234            format!(
1235                "Failed to copy fixture PDF from {:?} to {:?}",
1236                fixture_pdf_src, fixture_pdf_dest
1237            )
1238        })?;
1239
1240        fs::write(path.join("normal.txt"), "Normal text content.")?;
1241
1242        let config = GrabConfig {
1243            target_path: path.clone(),
1244            add_headers: true,
1245            exclude_patterns: vec![
1246                "dirgrab.txt".into(),
1247                "*.log".into(),
1248                "*.dat".into(),
1249                "*.rs".into(),
1250                "subdir/".into(),
1251                ".gitignore".into(),
1252                "deep/".into(),
1253                "untracked.txt".into(),
1254            ],
1255            include_untracked: false,
1256            include_default_output: false,
1257            no_git: true,
1258            include_tree: false,
1259            convert_pdf: true,
1260            all_repo: false,
1261        };
1262
1263        let result_string = grab_contents(&config)?;
1264
1265        // Check PDF header
1266        let expected_pdf_header = "--- FILE: sample.pdf (extracted text) ---";
1267        assert!(
1268            result_string.contains(expected_pdf_header),
1269            "Missing or incorrect PDF header. Output:\n{}",
1270            result_string
1271        );
1272
1273        // *** Update expected content based on actual PDF text - try a different snippet ***
1274        // let expected_pdf_content = "Pines are the largest and most"; // Original snippet
1275        let expected_pdf_content = "Pinaceae family"; // Try this snippet instead
1276
1277        // Add a println to see exactly what is being searched for and in what
1278        println!("Searching for: '{}'", expected_pdf_content);
1279        println!("Within: '{}'", result_string);
1280
1281        assert!(
1282            result_string.contains(expected_pdf_content),
1283            "Missing extracted PDF content ('{}'). Output:\n{}",
1284            expected_pdf_content,
1285            result_string
1286        );
1287
1288        // Check normal text file header and content
1289        let expected_txt_header = "--- FILE: normal.txt ---";
1290        let expected_txt_content = "Normal text content.";
1291        assert!(
1292            result_string.contains(expected_txt_header),
1293            "Missing or incorrect TXT header. Output:\n{}",
1294            result_string
1295        );
1296        assert!(
1297            result_string.contains(expected_txt_content),
1298            "Missing TXT content. Output:\n{}",
1299            result_string
1300        );
1301
1302        // Check that file1.txt (not excluded) is present
1303        let expected_file1_header = "--- FILE: file1.txt ---";
1304        assert!(
1305            result_string.contains(expected_file1_header),
1306            "Missing file1.txt header. Output:\n{}",
1307            result_string
1308        );
1309
1310        Ok(())
1311    }
1312
1313    #[test]
1314    fn test_grab_contents_with_pdf_conversion_disabled() -> Result<()> {
1315        let (_dir, path) = setup_test_dir()?; // Use existing helper
1316        let base_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1317        let fixtures_dir = base_dir.join("tests/fixtures");
1318        fs::create_dir_all(&fixtures_dir)?; // Ensure exists
1319        let fixture_pdf_src = fixtures_dir.join("sample.pdf");
1320
1321        // Create dummy if needed
1322        if !fixture_pdf_src.exists() {
1323            let basic_pdf_content = "%PDF-1.4\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj\n2 0 obj<</Type/Pages/Count 1/Kids[3 0 R]>>endobj\n3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Contents 4 0 R/Resources<<>>>>endobj\n4 0 obj<</Length 52>>stream\nBT /F1 12 Tf 72 712 Td (This is sample PDF text content.) Tj ET\nendstream\nendobj\nxref\n0 5\n0000000000 65535 f \n0000000010 00000 n \n0000000063 00000 n \n0000000117 00000 n \n0000000198 00000 n \ntrailer<</Size 5/Root 1 0 R>>\nstartxref\n315\n%%EOF";
1324            fs::write(&fixture_pdf_src, basic_pdf_content)?;
1325            println!(
1326                "Created dummy sample.pdf for testing at {:?}",
1327                fixture_pdf_src
1328            );
1329        }
1330
1331        let fixture_pdf_dest = path.join("sample.pdf");
1332        fs::copy(&fixture_pdf_src, &fixture_pdf_dest).with_context(|| {
1333            format!(
1334                "Failed to copy fixture PDF from {:?} to {:?}",
1335                fixture_pdf_src, fixture_pdf_dest
1336            )
1337        })?;
1338        fs::write(path.join("normal.txt"), "Normal text content.")?;
1339
1340        let config = GrabConfig {
1341            target_path: path.clone(),
1342            add_headers: true,
1343            // Exclude many things to simplify output check
1344            exclude_patterns: vec![
1345                "dirgrab.txt".into(),
1346                "*.log".into(),
1347                "*.dat".into(),
1348                "*.rs".into(),
1349                "subdir/".into(),
1350                ".gitignore".into(),
1351                "deep/".into(),
1352                "untracked.txt".into(),
1353            ],
1354            include_untracked: false,
1355            include_default_output: false,
1356            no_git: true,
1357            include_tree: false,
1358            convert_pdf: false, // Disable PDF conversion
1359            all_repo: false,
1360        };
1361
1362        let result_string = grab_contents(&config)?;
1363
1364        // Check PDF is NOT processed as text
1365        let unexpected_pdf_header_part = "(extracted text)"; // Check for the specific part of the header
1366        let unexpected_pdf_content = "This is sample PDF text content.";
1367        assert!(
1368            !result_string.contains(unexpected_pdf_header_part),
1369            "PDF extracted text header part present unexpectedly. Output:\n{}",
1370            result_string
1371        );
1372        assert!(
1373            !result_string.contains(unexpected_pdf_content),
1374            "Extracted PDF content present unexpectedly. Output:\n{}",
1375            result_string
1376        );
1377
1378        // Check normal text file is still included
1379        let expected_txt_header = "--- FILE: normal.txt ---";
1380        let expected_txt_content = "Normal text content.";
1381        assert!(
1382            result_string.contains(expected_txt_header),
1383            "Missing or incorrect TXT header. Output:\n{}",
1384            result_string
1385        );
1386        assert!(
1387            result_string.contains(expected_txt_content),
1388            "Missing TXT content. Output:\n{}",
1389            result_string
1390        );
1391
1392        // Check that file1.txt (not excluded) is present
1393        let expected_file1_header = "--- FILE: file1.txt ---";
1394        assert!(
1395            result_string.contains(expected_file1_header),
1396            "Missing file1.txt header. Output:\n{}",
1397            result_string
1398        );
1399
1400        // With convert_pdf: false, the PDF should be skipped as non-UTF8 by the fallback logic.
1401        // Check that the standard PDF header does NOT appear either.
1402        let regular_pdf_header = "--- FILE: sample.pdf ---";
1403        assert!(
1404            !result_string.contains(regular_pdf_header),
1405            "Regular PDF header present when it should have been skipped as non-utf8. Output:\n{}",
1406            result_string
1407        );
1408
1409        Ok(())
1410    }
1411} // End of mod tests