1extern crate anyhow;
2extern crate ignore;
3extern crate tempfile;
4extern crate tiktoken_rs;
5
6use anyhow::Context;
7use ignore::{overrides::OverrideBuilder, WalkBuilder};
8use std::io::{BufWriter, Write};
9use std::path::Path;
10use tempfile::NamedTempFile;
11use tiktoken_rs::o200k_base;
12
13pub enum GitignoreMode {
14 Auto,
15 Path(std::path::PathBuf),
16 Disabled,
17}
18
19pub fn write_directory_contents_yaml(
20 directory: &Path,
21 output: &Path,
22 ignore_patterns: &[String],
23 gitignore_mode: GitignoreMode,
24 dumpignore_path: Option<&Path>,
25) -> anyhow::Result<()> {
26 let bpe = o200k_base().context("Failed to load BPE tokenizer")?;
28
29 let absolute_directory = directory
31 .canonicalize()
32 .context("Failed to get absolute path")?;
33
34 let mut walker = WalkBuilder::new(&absolute_directory);
36 if let Some(dumpignore) = dumpignore_path {
37 walker.add_custom_ignore_filename(dumpignore);
38 }
39
40 if !ignore_patterns.is_empty() {
42 let mut override_builder = OverrideBuilder::new(&absolute_directory);
43 for pattern in ignore_patterns {
44 let ignore_pattern = if pattern.starts_with('!') {
46 pattern.to_string()
47 } else {
48 format!("!{}", pattern)
49 };
50 if let Err(err) = override_builder.add(&ignore_pattern) {
51 log::warn!("Failed to add ignore pattern '{}': {}", pattern, err);
52 }
53 }
54 if let Ok(overrides) = override_builder.build() {
55 walker.overrides(overrides);
56 }
57 }
58
59 match gitignore_mode {
61 GitignoreMode::Auto => {
62 walker.require_git(false); walker.ignore(true); walker.git_ignore(true); walker.git_global(true); walker.git_exclude(true); }
69 GitignoreMode::Path(ref p) => {
70 walker.require_git(false);
72 walker.ignore(false);
73 walker.git_ignore(false);
74 walker.git_global(false);
75 walker.git_exclude(false);
76 walker.add_ignore(p);
77 }
78 GitignoreMode::Disabled => {
79 walker.require_git(false);
81 walker.ignore(false);
82 walker.git_ignore(false);
83 walker.git_global(false);
84 walker.git_exclude(false);
85 }
86 }
87
88 walker.follow_links(false);
89
90 let output_abs = if output.is_absolute() {
92 output.to_path_buf()
93 } else {
94 absolute_directory.join(output)
95 };
96
97 let output_abs = output_abs.canonicalize().unwrap_or(output_abs);
99
100 walker.filter_entry(move |entry| entry.path() != output_abs);
101
102 let walker_iter = walker.build();
103
104 let tmp = NamedTempFile::new().context("Failed to create temporary file")?;
106 let mut writer = BufWriter::new(tmp.reopen()?);
107
108 let project_name = absolute_directory
110 .file_name()
111 .map_or("project", |s| s.to_str().unwrap());
112 writeln!(writer, "project: {}", project_name)?;
113 writeln!(writer, "files:")?;
114
115 let mut file_count = 0;
117 let mut total_characters = 0;
118 let mut total_tokens = 0;
119
120 for entry in walker_iter {
122 let entry = entry.context("Failed to read directory entry")?;
123 if entry.file_type().is_some_and(|ft| ft.is_file()) {
124 let relative_path = entry
126 .path()
127 .strip_prefix(&absolute_directory)
128 .context("Failed to get relative path")?;
129 let relative_path_str = relative_path.to_string_lossy();
130
131 let metadata = entry.metadata().context("Failed to get metadata")?;
133 let size_bytes = metadata.len();
134 let size_str = if size_bytes < 1024 {
135 format!("{} B", size_bytes)
136 } else {
137 let kb = size_bytes as f64 / 1024.0;
138 format!("{:.1} KB", kb)
139 };
140
141 let (lines, tokens, content) = match std::fs::read_to_string(entry.path()) {
142 Ok(text) => {
143 let line_count = text.lines().count();
144 let tokens = bpe.encode_with_special_tokens(&text);
145 (line_count, tokens.len(), text)
146 }
147 Err(_) => (
148 0,
149 0,
150 format!("Binary or inaccessible file: {}", entry.path().display()),
151 ),
152 };
153
154 file_count += 1;
156 total_characters += content.chars().count();
157 total_tokens += tokens;
158
159 writeln!(writer, " - path: {:?}", relative_path_str)?;
161 writeln!(writer, " size: \"{}\"", size_str)?;
162 writeln!(writer, " lines: {}", lines)?;
163 writeln!(writer, " tokens: {}", tokens)?;
164 writeln!(writer, " content: |")?;
165 for line in content.lines() {
167 writeln!(writer, " {}", line)?;
168 }
169 }
170 }
171
172 if let Some(parent) = output.parent() {
174 std::fs::create_dir_all(parent).ok();
175 }
176 tmp.persist(output).map_err(|e| anyhow::anyhow!(e.error))?;
177
178 writer.flush()?;
179
180 log::info!(
182 "Processed {} files with {} total characters and {} total tokens.",
183 file_count,
184 total_characters,
185 total_tokens
186 );
187
188 Ok(())
189}