1use crate::compress::{compress_source, language_for_path, CompressResult};
2use crate::config::Config;
3use crate::filters::{
4 exceeds_size_limit, is_binary_content, is_binary_extension, is_secret_file, SkipReason,
5};
6use crate::output::{OutputWriter, Statistics};
7use crate::priority::score_file;
8use crate::tokens::{is_prose_extension, Tokenizer};
9use anyhow::{Context, Result};
10use ignore::WalkBuilder;
11use std::fs;
12use std::io::Write;
13use std::path::{Path, PathBuf};
14
15struct FileCandidate {
17 path: PathBuf,
18 content: String,
19 score: u32,
20 is_prose: bool,
21}
22
23enum FileDecision {
25 IncludeFull(String),
26 IncludeCompressed(String),
27 Excluded,
28}
29
30pub fn walk_and_flatten(config: &Config) -> Result<Statistics> {
31 let mut stats = Statistics::new();
32 let tokenizer = Tokenizer::new(config.tokenizer.clone());
33
34 let mut builder = WalkBuilder::new(&config.path);
36 builder.standard_filters(true);
37
38 if let Some(ref gitignore_path) = config.gitignore_path {
39 builder.add_custom_ignore_filename(gitignore_path);
40 }
41
42 let writer: Box<dyn Write> = match &config.output_file {
44 Some(path) => Box::new(
45 fs::File::create(path)
46 .with_context(|| format!("Failed to create output file: {}", path.display()))?,
47 ),
48 None => Box::new(std::io::stdout()),
49 };
50
51 let mut output = OutputWriter::new(writer);
52
53 let mut files_to_process = Vec::new();
55
56 for result in builder.build() {
57 match result {
58 Ok(entry) => {
59 let path = entry.path();
60
61 if path.is_dir() {
62 continue;
63 }
64
65 if let Some(reason) = should_skip(path, config) {
66 stats.add_skipped(reason.clone());
67 if !config.stats_only {
68 eprintln!("Skipping {}: {}", path.display(), reason);
69 }
70 continue;
71 }
72
73 files_to_process.push(path.to_path_buf());
74 let extension = path.extension().and_then(|e| e.to_str());
75 stats.add_included(extension);
76 }
77 Err(e) => {
78 eprintln!("Error walking directory: {}", e);
79 stats.add_skipped(SkipReason::ReadError);
80 }
81 }
82 }
83
84 files_to_process.sort();
86
87 if let Some(budget) = config.token_budget {
89 stats.token_budget = Some(budget);
90 write_with_budget(config, &files_to_process, &mut output, &mut stats, budget, &tokenizer)?;
91 } else if config.stats_only {
92 for path in &files_to_process {
93 let path_str = path.display().to_string();
94 if config.compress {
95 let file_name = path
96 .file_name()
97 .map(|f| f.to_string_lossy().to_string())
98 .unwrap_or_default();
99 let is_full = config.is_full_match(&file_name);
100 if !is_full {
101 if let Some(lang) = language_for_path(path) {
102 if let Ok(content) = fs::read_to_string(path) {
103 match compress_source(&content, lang) {
104 CompressResult::Compressed(compressed) => {
105 stats.add_file_size_estimate(
106 compressed.len() as u64,
107 path_str.len(),
108 path.extension().and_then(|e| e.to_str()),
109 );
110 stats.add_compressed();
111 continue;
112 }
113 CompressResult::Fallback(original, _) => {
114 stats.add_file_size_estimate(
115 original.len() as u64,
116 path_str.len(),
117 path.extension().and_then(|e| e.to_str()),
118 );
119 continue;
120 }
121 }
122 }
123 }
124 }
125 }
126 if let Ok(metadata) = fs::metadata(path) {
128 stats.add_file_size_estimate(
129 metadata.len(),
130 path_str.len(),
131 path.extension().and_then(|e| e.to_str()),
132 );
133 }
134 }
135 eprintln!("{}", stats.format_summary());
136 } else if config.dry_run {
137 for path in &files_to_process {
138 output.write_file_path(&path.display().to_string())?;
139 }
140 stats.add_output_bytes(output.bytes_written());
141 output.write_summary(&stats)?;
142 } else {
143 write_normal(config, &files_to_process, &mut output, &mut stats)?;
144 }
145
146 Ok(stats)
147}
148
149fn write_with_budget(
151 config: &Config,
152 files: &[PathBuf],
153 output: &mut OutputWriter,
154 stats: &mut Statistics,
155 budget: usize,
156 tokenizer: &Tokenizer,
157) -> Result<()> {
158 let base_path = &config.path;
159
160 let mut candidates: Vec<FileCandidate> = Vec::new();
162 for path in files {
163 match fs::read_to_string(path) {
164 Ok(content) => {
165 let score = score_file(path, base_path);
166 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
167 let is_prose = is_prose_extension(ext);
168 candidates.push(FileCandidate {
169 path: path.clone(),
170 content,
171 score,
172 is_prose,
173 });
174 }
175 Err(e) => {
176 eprintln!("Error reading {}: {}", path.display(), e);
177 }
178 }
179 }
180
181 candidates.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
183
184 let mut remaining_budget = budget;
185
186 let mut decisions: Vec<(&FileCandidate, FileDecision)> = Vec::new();
188
189 for candidate in &candidates {
190 let display_path = candidate.path.display().to_string();
191 let file_name = candidate
192 .path
193 .file_name()
194 .map(|f| f.to_string_lossy().to_string())
195 .unwrap_or_default();
196 let full_tokens = tokenizer.count_tokens(&candidate.content, candidate.is_prose);
197
198 if config.compress && config.is_full_match(&file_name) {
199 if full_tokens <= remaining_budget {
201 remaining_budget -= full_tokens;
202 stats.tokens_used += full_tokens;
203 decisions.push((
204 candidate,
205 FileDecision::IncludeFull(candidate.content.clone()),
206 ));
207 } else {
208 stats.excluded_by_budget.push(display_path);
209 decisions.push((candidate, FileDecision::Excluded));
210 }
211 } else if full_tokens <= remaining_budget {
212 remaining_budget -= full_tokens;
214 stats.tokens_used += full_tokens;
215 if config.compress {
216 let content = maybe_compress(config, &candidate.path, &candidate.content, stats);
218 decisions.push((candidate, content));
219 } else {
220 decisions.push((
221 candidate,
222 FileDecision::IncludeFull(candidate.content.clone()),
223 ));
224 }
225 } else if config.compress {
226 if let Some(lang) = language_for_path(&candidate.path) {
228 match compress_source(&candidate.content, lang) {
229 CompressResult::Compressed(compressed) => {
230 let compressed_tokens = tokenizer.count_tokens(&compressed, candidate.is_prose);
231 if compressed_tokens <= remaining_budget {
232 remaining_budget -= compressed_tokens;
233 stats.tokens_used += compressed_tokens;
234 stats.add_compressed();
235 decisions
236 .push((candidate, FileDecision::IncludeCompressed(compressed)));
237 } else {
238 stats.excluded_by_budget.push(display_path);
239 decisions.push((candidate, FileDecision::Excluded));
240 }
241 }
242 CompressResult::Fallback(original, reason) => {
243 if let Some(reason) = &reason {
244 eprintln!(
245 "Warning: compression failed for {}: {}, including full content",
246 display_path, reason
247 );
248 }
249 let fallback_tokens = tokenizer.count_tokens(&original, candidate.is_prose);
251 if fallback_tokens <= remaining_budget {
252 remaining_budget -= fallback_tokens;
253 stats.tokens_used += fallback_tokens;
254 decisions.push((candidate, FileDecision::IncludeFull(original)));
255 } else {
256 stats.excluded_by_budget.push(display_path);
257 decisions.push((candidate, FileDecision::Excluded));
258 }
259 }
260 }
261 } else {
262 stats.excluded_by_budget.push(display_path);
264 decisions.push((candidate, FileDecision::Excluded));
265 }
266 } else {
267 stats.excluded_by_budget.push(display_path);
269 decisions.push((candidate, FileDecision::Excluded));
270 }
271 }
272
273 if config.stats_only {
275 for (candidate, decision) in &decisions {
276 match decision {
277 FileDecision::IncludeFull(content) | FileDecision::IncludeCompressed(content) => {
278 let path_str = candidate.path.display().to_string();
279 stats.add_file_size_estimate(
280 content.len() as u64,
281 path_str.len(),
282 candidate.path.extension().and_then(|e| e.to_str()),
283 );
284 }
285 FileDecision::Excluded => {}
286 }
287 }
288 eprintln!("{}", stats.format_summary());
289 } else if config.dry_run {
290 for (candidate, decision) in &decisions {
291 let display_path = candidate.path.display().to_string();
292 let annotation = match decision {
293 FileDecision::IncludeFull(_) => "[FULL]",
294 FileDecision::IncludeCompressed(_) => "[COMPRESSED]",
295 FileDecision::Excluded => "[EXCLUDED]",
296 };
297 output.write_file_path(&format!("{} {}", display_path, annotation))?;
298 }
299 stats.add_output_bytes(output.bytes_written());
300 output.write_summary(stats)?;
301 } else {
302 for (candidate, decision) in &decisions {
303 let display_path = candidate.path.display().to_string();
304 match decision {
305 FileDecision::IncludeFull(content) => {
306 let mode = if config.compress { Some("full") } else { None };
307 output.write_file_content_with_mode(&display_path, content, mode)?;
308 }
309 FileDecision::IncludeCompressed(content) => {
310 output.write_file_content_with_mode(
311 &display_path,
312 content,
313 Some("compressed"),
314 )?;
315 }
316 FileDecision::Excluded => {}
317 }
318 }
319 stats.add_output_bytes(output.bytes_written());
320 output.write_summary(stats)?;
321 }
322
323 Ok(())
324}
325
326fn write_normal(
328 config: &Config,
329 files: &[PathBuf],
330 output: &mut OutputWriter,
331 stats: &mut Statistics,
332) -> Result<()> {
333 for path in files {
334 match fs::read_to_string(path) {
335 Ok(content) => {
336 let display_path = path.display().to_string();
337
338 if config.compress {
339 let file_name = path
340 .file_name()
341 .map(|f| f.to_string_lossy().to_string())
342 .unwrap_or_default();
343 let is_full = config.is_full_match(&file_name);
344
345 if is_full {
346 output.write_file_content_with_mode(
347 &display_path,
348 &content,
349 Some("full"),
350 )?;
351 } else if let Some(lang) = language_for_path(path) {
352 match compress_source(&content, lang) {
353 CompressResult::Compressed(compressed) => {
354 output.write_file_content_with_mode(
355 &display_path,
356 &compressed,
357 Some("compressed"),
358 )?;
359 stats.add_compressed();
360 }
361 CompressResult::Fallback(original, reason) => {
362 if let Some(reason) = reason {
363 eprintln!(
364 "Warning: compression failed for {}: {}, including full content",
365 display_path, reason
366 );
367 }
368 output.write_file_content_with_mode(
369 &display_path,
370 &original,
371 Some("full"),
372 )?;
373 }
374 }
375 } else {
376 output.write_file_content_with_mode(
377 &display_path,
378 &content,
379 Some("full"),
380 )?;
381 }
382 } else {
383 output.write_file_content(&display_path, &content)?;
384 }
385 }
386 Err(e) => {
387 eprintln!("Error reading {}: {}", path.display(), e);
388 }
389 }
390 }
391
392 stats.add_output_bytes(output.bytes_written());
393 output.write_summary(stats)?;
394 Ok(())
395}
396
397fn maybe_compress(
399 config: &Config,
400 path: &Path,
401 content: &str,
402 stats: &mut Statistics,
403) -> FileDecision {
404 let file_name = path
405 .file_name()
406 .map(|f| f.to_string_lossy().to_string())
407 .unwrap_or_default();
408
409 if config.is_full_match(&file_name) {
410 return FileDecision::IncludeFull(content.to_string());
411 }
412
413 if let Some(lang) = language_for_path(path) {
414 match compress_source(content, lang) {
415 CompressResult::Compressed(compressed) => {
416 stats.add_compressed();
417 FileDecision::IncludeCompressed(compressed)
418 }
419 CompressResult::Fallback(original, reason) => {
420 if let Some(reason) = reason {
421 eprintln!(
422 "Warning: compression failed for {}: {}, including full content",
423 path.display(),
424 reason
425 );
426 }
427 FileDecision::IncludeFull(original)
428 }
429 }
430 } else {
431 FileDecision::IncludeFull(content.to_string())
432 }
433}
434
435fn should_skip(path: &Path, config: &Config) -> Option<SkipReason> {
437 if let Some(file_name) = path.file_name() {
438 if !config.should_include_by_match(&file_name.to_string_lossy()) {
439 return Some(SkipReason::Match);
440 }
441 }
442
443 if is_secret_file(path) {
444 return Some(SkipReason::Secret);
445 }
446
447 if let Some(ext) = path.extension() {
448 let ext_str = ext.to_string_lossy();
449 if !config.should_include_extension(&ext_str) {
450 return Some(SkipReason::Extension);
451 }
452
453 if is_binary_extension(path) {
454 return Some(SkipReason::Binary);
455 }
456 }
457
458 if exceeds_size_limit(path, config.max_file_size) {
459 return Some(SkipReason::TooLarge);
460 }
461
462 if is_binary_content(path) {
463 return Some(SkipReason::Binary);
464 }
465
466 None
467}
468
469#[cfg(test)]
470mod tests {
471 use super::*;
472
473 #[test]
474 fn test_should_skip_secret() {
475 let config = Config::default();
476 assert_eq!(
477 should_skip(Path::new(".env"), &config),
478 Some(SkipReason::Secret)
479 );
480 assert_eq!(
481 should_skip(Path::new("credentials.json"), &config),
482 Some(SkipReason::Secret)
483 );
484 }
485
486 #[test]
487 fn test_should_skip_binary_extension() {
488 let config = Config::default();
489 assert_eq!(
490 should_skip(Path::new("image.png"), &config),
491 Some(SkipReason::Binary)
492 );
493 assert_eq!(
494 should_skip(Path::new("binary.exe"), &config),
495 Some(SkipReason::Binary)
496 );
497 }
498
499 #[test]
500 fn test_should_skip_extension_filter() {
501 let config = Config {
502 include_extensions: Some(vec!["rs".to_string()]),
503 ..Default::default()
504 };
505
506 assert_eq!(
507 should_skip(Path::new("file.json"), &config),
508 Some(SkipReason::Extension)
509 );
510 assert_eq!(should_skip(Path::new("file.rs"), &config), None);
511 }
512
513 #[test]
514 fn test_should_skip_match_filter() {
515 let config = Config {
516 match_patterns: Some(vec![globset::Glob::new("*_test.go")
517 .unwrap()
518 .compile_matcher()]),
519 ..Default::default()
520 };
521
522 assert_eq!(
523 should_skip(Path::new("main.go"), &config),
524 Some(SkipReason::Match)
525 );
526 assert_eq!(should_skip(Path::new("user_test.go"), &config), None);
527 }
528}