dumpfiles/
lib.rs

1extern crate anyhow;
2extern crate ignore;
3extern crate tempfile;
4extern crate tiktoken_rs;
5
6use anyhow::Context;
7use ignore::{overrides::OverrideBuilder, WalkBuilder};
8use std::io::{BufWriter, Write};
9use std::path::Path;
10use tempfile::NamedTempFile;
11use tiktoken_rs::o200k_base;
12
13pub enum GitignoreMode {
14    Auto,
15    Path(std::path::PathBuf),
16    Disabled,
17}
18
19pub fn write_directory_contents_yaml(
20    directory: &Path,
21    output: &Path,
22    ignore_patterns: &[String],
23    gitignore_mode: GitignoreMode,
24    dumpignore_path: Option<&Path>,
25) -> anyhow::Result<()> {
26    // Load BPE tokenizer
27    let bpe = o200k_base().context("Failed to load BPE tokenizer")?;
28
29    // Canonicalize the directory for absolute paths.
30    let absolute_directory = directory
31        .canonicalize()
32        .context("Failed to get absolute path")?;
33
34    // Set up the walker with ignore patterns and .gitignore/.dumpignore.
35    let mut walker = WalkBuilder::new(&absolute_directory);
36    if let Some(dumpignore) = dumpignore_path {
37        walker.add_custom_ignore_filename(dumpignore);
38    }
39
40    // Create override builder for ignore patterns
41    if !ignore_patterns.is_empty() {
42        let mut override_builder = OverrideBuilder::new(&absolute_directory);
43        for pattern in ignore_patterns {
44            // Add patterns with ! prefix to ignore them (gitignore semantics)
45            let ignore_pattern = if pattern.starts_with('!') {
46                pattern.to_string()
47            } else {
48                format!("!{}", pattern)
49            };
50            if let Err(err) = override_builder.add(&ignore_pattern) {
51                log::warn!("Failed to add ignore pattern '{}': {}", pattern, err);
52            }
53        }
54        if let Ok(overrides) = override_builder.build() {
55            walker.overrides(overrides);
56        }
57    }
58
59    // Gitignore behavior per the desired semantics
60    match gitignore_mode {
61        GitignoreMode::Auto => {
62            // Default discovery ON (this is WalkBuilder’s default, but be explicit)
63            walker.require_git(false); // do not require a .git directory
64            walker.ignore(true); // .ignore
65            walker.git_ignore(true); // discovered .gitignore
66            walker.git_global(true); // global excludes
67            walker.git_exclude(true); // core excludes
68        }
69        GitignoreMode::Path(ref p) => {
70            // Use only the provided file; disable discovery
71            walker.require_git(false);
72            walker.ignore(false);
73            walker.git_ignore(false);
74            walker.git_global(false);
75            walker.git_exclude(false);
76            walker.add_ignore(p);
77        }
78        GitignoreMode::Disabled => {
79            // Disable everything
80            walker.require_git(false);
81            walker.ignore(false);
82            walker.git_ignore(false);
83            walker.git_global(false);
84            walker.git_exclude(false);
85        }
86    }
87
88    walker.follow_links(false);
89
90    // Always ignore the output file.
91    let output_abs = if output.is_absolute() {
92        output.to_path_buf()
93    } else {
94        absolute_directory.join(output)
95    };
96
97    // Canonicalize to match walker's canonicalized entries, but fall back if it doesn't exist yet.
98    let output_abs = output_abs.canonicalize().unwrap_or(output_abs);
99
100    walker.filter_entry(move |entry| entry.path() != output_abs);
101
102    let walker_iter = walker.build();
103
104    // Temporary file to write output before moving to final destination
105    let tmp = NamedTempFile::new().context("Failed to create temporary file")?;
106    let mut writer = BufWriter::new(tmp.reopen()?);
107
108    // Write the YAML header. Here, the project name is taken from the directory's final component.
109    let project_name = absolute_directory
110        .file_name()
111        .map_or("project", |s| s.to_str().unwrap());
112    writeln!(writer, "project: {}", project_name)?;
113    writeln!(writer, "files:")?;
114
115    // Track statistics for logging
116    let mut file_count = 0;
117    let mut total_characters = 0;
118    let mut total_tokens = 0;
119
120    // Iterate over the walker and write each file as a flat YAML entry.
121    for entry in walker_iter {
122        let entry = entry.context("Failed to read directory entry")?;
123        if entry.file_type().is_some_and(|ft| ft.is_file()) {
124            // Get the file's path relative to the directory.
125            let relative_path = entry
126                .path()
127                .strip_prefix(&absolute_directory)
128                .context("Failed to get relative path")?;
129            let relative_path_str = relative_path.to_string_lossy();
130
131            // Retrieve file metadata to compute its size.
132            let metadata = entry.metadata().context("Failed to get metadata")?;
133            let size_bytes = metadata.len();
134            let size_str = if size_bytes < 1024 {
135                format!("{} B", size_bytes)
136            } else {
137                let kb = size_bytes as f64 / 1024.0;
138                format!("{:.1} KB", kb)
139            };
140
141            let (lines, tokens, content) = match std::fs::read_to_string(entry.path()) {
142                Ok(text) => {
143                    let line_count = text.lines().count();
144                    let tokens = bpe.encode_with_special_tokens(&text);
145                    (line_count, tokens.len(), text)
146                }
147                Err(_) => (
148                    0,
149                    0,
150                    format!("Binary or inaccessible file: {}", entry.path().display()),
151                ),
152            };
153
154            // Update statistics
155            file_count += 1;
156            total_characters += content.chars().count();
157            total_tokens += tokens;
158
159            // Write the YAML mapping for this file.
160            writeln!(writer, "  - path: {:?}", relative_path_str)?;
161            writeln!(writer, "    size: \"{}\"", size_str)?;
162            writeln!(writer, "    lines: {}", lines)?;
163            writeln!(writer, "    tokens: {}", tokens)?;
164            writeln!(writer, "    content: |")?;
165            // Indent each line of content with six spaces.
166            for line in content.lines() {
167                writeln!(writer, "      {}", line)?;
168            }
169        }
170    }
171
172    // Rename temporary file to the output location
173    if let Some(parent) = output.parent() {
174        std::fs::create_dir_all(parent).ok();
175    }
176    tmp.persist(output).map_err(|e| anyhow::anyhow!(e.error))?;
177
178    writer.flush()?;
179
180    // Log summary statistics
181    log::info!(
182        "Processed {} files with {} total characters and {} total tokens.",
183        file_count,
184        total_characters,
185        total_tokens
186    );
187
188    Ok(())
189}