Skip to main content

flat/
walker.rs

1use crate::compress::{compress_source, language_for_path, CompressResult};
2use crate::config::Config;
3use crate::filters::{
4    exceeds_size_limit, is_binary_content, is_binary_extension, is_secret_file, SkipReason,
5};
6use crate::output::{OutputWriter, Statistics};
7use crate::priority::score_file;
8use crate::tokens::{is_prose_extension, Tokenizer};
9use anyhow::{Context, Result};
10use ignore::WalkBuilder;
11use std::fs;
12use std::io::Write;
13use std::path::{Path, PathBuf};
14
15/// A file candidate with its content and metadata for budget allocation
16struct FileCandidate {
17    path: PathBuf,
18    content: String,
19    score: u32,
20    is_prose: bool,
21}
22
23/// Result of budget allocation for a single file
24enum FileDecision {
25    IncludeFull(String),
26    IncludeCompressed(String),
27    Excluded,
28}
29
30pub fn walk_and_flatten(config: &Config) -> Result<Statistics> {
31    let mut stats = Statistics::new();
32    let tokenizer = Tokenizer::new(config.tokenizer.clone());
33
34    // Build the walker with gitignore support
35    let mut builder = WalkBuilder::new(&config.path);
36    builder.standard_filters(true);
37
38    if let Some(ref gitignore_path) = config.gitignore_path {
39        builder.add_custom_ignore_filename(gitignore_path);
40    }
41
42    // Create output writer
43    let writer: Box<dyn Write> = match &config.output_file {
44        Some(path) => Box::new(
45            fs::File::create(path)
46                .with_context(|| format!("Failed to create output file: {}", path.display()))?,
47        ),
48        None => Box::new(std::io::stdout()),
49    };
50
51    let mut output = OutputWriter::new(writer);
52
53    // First pass: collect all files
54    let mut files_to_process = Vec::new();
55
56    for result in builder.build() {
57        match result {
58            Ok(entry) => {
59                let path = entry.path();
60
61                if path.is_dir() {
62                    continue;
63                }
64
65                if let Some(reason) = should_skip(path, config) {
66                    stats.add_skipped(reason.clone());
67                    if !config.stats_only {
68                        eprintln!("Skipping {}: {}", path.display(), reason);
69                    }
70                    continue;
71                }
72
73                files_to_process.push(path.to_path_buf());
74                let extension = path.extension().and_then(|e| e.to_str());
75                stats.add_included(extension);
76            }
77            Err(e) => {
78                eprintln!("Error walking directory: {}", e);
79                stats.add_skipped(SkipReason::ReadError);
80            }
81        }
82    }
83
84    // Sort files by path for deterministic output
85    files_to_process.sort();
86
87    // Handle token budget mode
88    if let Some(budget) = config.token_budget {
89        stats.token_budget = Some(budget);
90        write_with_budget(config, &files_to_process, &mut output, &mut stats, budget, &tokenizer)?;
91    } else if config.stats_only {
92        for path in &files_to_process {
93            let path_str = path.display().to_string();
94            if config.compress {
95                let file_name = path
96                    .file_name()
97                    .map(|f| f.to_string_lossy().to_string())
98                    .unwrap_or_default();
99                let is_full = config.is_full_match(&file_name);
100                if !is_full {
101                    if let Some(lang) = language_for_path(path) {
102                        if let Ok(content) = fs::read_to_string(path) {
103                            match compress_source(&content, lang) {
104                                CompressResult::Compressed(compressed) => {
105                                    stats.add_file_size_estimate(
106                                        compressed.len() as u64,
107                                        path_str.len(),
108                                        path.extension().and_then(|e| e.to_str()),
109                                    );
110                                    stats.add_compressed();
111                                    continue;
112                                }
113                                CompressResult::Fallback(original, _) => {
114                                    stats.add_file_size_estimate(
115                                        original.len() as u64,
116                                        path_str.len(),
117                                        path.extension().and_then(|e| e.to_str()),
118                                    );
119                                    continue;
120                                }
121                            }
122                        }
123                    }
124                }
125            }
126            // Non-compress mode, full-match files, or non-compressible files: use raw size
127            if let Ok(metadata) = fs::metadata(path) {
128                stats.add_file_size_estimate(
129                    metadata.len(),
130                    path_str.len(),
131                    path.extension().and_then(|e| e.to_str()),
132                );
133            }
134        }
135        eprintln!("{}", stats.format_summary());
136    } else if config.dry_run {
137        for path in &files_to_process {
138            output.write_file_path(&path.display().to_string())?;
139        }
140        stats.add_output_bytes(output.bytes_written());
141        output.write_summary(&stats)?;
142    } else {
143        write_normal(config, &files_to_process, &mut output, &mut stats)?;
144    }
145
146    Ok(stats)
147}
148
149/// Write files with token budget allocation
150fn write_with_budget(
151    config: &Config,
152    files: &[PathBuf],
153    output: &mut OutputWriter,
154    stats: &mut Statistics,
155    budget: usize,
156    tokenizer: &Tokenizer,
157) -> Result<()> {
158    let base_path = &config.path;
159
160    // Read all file contents and compute scores
161    let mut candidates: Vec<FileCandidate> = Vec::new();
162    for path in files {
163        match fs::read_to_string(path) {
164            Ok(content) => {
165                let score = score_file(path, base_path);
166                let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
167                let is_prose = is_prose_extension(ext);
168                candidates.push(FileCandidate {
169                    path: path.clone(),
170                    content,
171                    score,
172                    is_prose,
173                });
174            }
175            Err(e) => {
176                eprintln!("Error reading {}: {}", path.display(), e);
177            }
178        }
179    }
180
181    // Sort by (score DESC, path ASC) — stable sort
182    candidates.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
183
184    let mut remaining_budget = budget;
185
186    // Allocate full-match files first (if --tokens + --compress + --full-match)
187    let mut decisions: Vec<(&FileCandidate, FileDecision)> = Vec::new();
188
189    for candidate in &candidates {
190        let display_path = candidate.path.display().to_string();
191        let file_name = candidate
192            .path
193            .file_name()
194            .map(|f| f.to_string_lossy().to_string())
195            .unwrap_or_default();
196        let full_tokens = tokenizer.count_tokens(&candidate.content, candidate.is_prose);
197
198        if config.compress && config.is_full_match(&file_name) {
199            // Full-match files: always use full content, never compress
200            if full_tokens <= remaining_budget {
201                remaining_budget -= full_tokens;
202                stats.tokens_used += full_tokens;
203                decisions.push((
204                    candidate,
205                    FileDecision::IncludeFull(candidate.content.clone()),
206                ));
207            } else {
208                stats.excluded_by_budget.push(display_path);
209                decisions.push((candidate, FileDecision::Excluded));
210            }
211        } else if full_tokens <= remaining_budget {
212            // File fits in full
213            remaining_budget -= full_tokens;
214            stats.tokens_used += full_tokens;
215            if config.compress {
216                // Even though it fits, still compress if possible (per flag behavior)
217                let content = maybe_compress(config, &candidate.path, &candidate.content, stats);
218                decisions.push((candidate, content));
219            } else {
220                decisions.push((
221                    candidate,
222                    FileDecision::IncludeFull(candidate.content.clone()),
223                ));
224            }
225        } else if config.compress {
226            // Try compressed version
227            if let Some(lang) = language_for_path(&candidate.path) {
228                match compress_source(&candidate.content, lang) {
229                    CompressResult::Compressed(compressed) => {
230                        let compressed_tokens = tokenizer.count_tokens(&compressed, candidate.is_prose);
231                        if compressed_tokens <= remaining_budget {
232                            remaining_budget -= compressed_tokens;
233                            stats.tokens_used += compressed_tokens;
234                            stats.add_compressed();
235                            decisions
236                                .push((candidate, FileDecision::IncludeCompressed(compressed)));
237                        } else {
238                            stats.excluded_by_budget.push(display_path);
239                            decisions.push((candidate, FileDecision::Excluded));
240                        }
241                    }
242                    CompressResult::Fallback(original, reason) => {
243                        if let Some(reason) = &reason {
244                            eprintln!(
245                                "Warning: compression failed for {}: {}, including full content",
246                                display_path, reason
247                            );
248                        }
249                        // Fallback is full size, which we already know doesn't fit
250                        let fallback_tokens = tokenizer.count_tokens(&original, candidate.is_prose);
251                        if fallback_tokens <= remaining_budget {
252                            remaining_budget -= fallback_tokens;
253                            stats.tokens_used += fallback_tokens;
254                            decisions.push((candidate, FileDecision::IncludeFull(original)));
255                        } else {
256                            stats.excluded_by_budget.push(display_path);
257                            decisions.push((candidate, FileDecision::Excluded));
258                        }
259                    }
260                }
261            } else {
262                // Unsupported for compression, and full doesn't fit
263                stats.excluded_by_budget.push(display_path);
264                decisions.push((candidate, FileDecision::Excluded));
265            }
266        } else {
267            // No compression, doesn't fit
268            stats.excluded_by_budget.push(display_path);
269            decisions.push((candidate, FileDecision::Excluded));
270        }
271    }
272
273    // Write output
274    if config.stats_only {
275        for (candidate, decision) in &decisions {
276            match decision {
277                FileDecision::IncludeFull(content) | FileDecision::IncludeCompressed(content) => {
278                    let path_str = candidate.path.display().to_string();
279                    stats.add_file_size_estimate(
280                        content.len() as u64,
281                        path_str.len(),
282                        candidate.path.extension().and_then(|e| e.to_str()),
283                    );
284                }
285                FileDecision::Excluded => {}
286            }
287        }
288        eprintln!("{}", stats.format_summary());
289    } else if config.dry_run {
290        for (candidate, decision) in &decisions {
291            let display_path = candidate.path.display().to_string();
292            let annotation = match decision {
293                FileDecision::IncludeFull(_) => "[FULL]",
294                FileDecision::IncludeCompressed(_) => "[COMPRESSED]",
295                FileDecision::Excluded => "[EXCLUDED]",
296            };
297            output.write_file_path(&format!("{} {}", display_path, annotation))?;
298        }
299        stats.add_output_bytes(output.bytes_written());
300        output.write_summary(stats)?;
301    } else {
302        for (candidate, decision) in &decisions {
303            let display_path = candidate.path.display().to_string();
304            match decision {
305                FileDecision::IncludeFull(content) => {
306                    let mode = if config.compress { Some("full") } else { None };
307                    output.write_file_content_with_mode(&display_path, content, mode)?;
308                }
309                FileDecision::IncludeCompressed(content) => {
310                    output.write_file_content_with_mode(
311                        &display_path,
312                        content,
313                        Some("compressed"),
314                    )?;
315                }
316                FileDecision::Excluded => {}
317            }
318        }
319        stats.add_output_bytes(output.bytes_written());
320        output.write_summary(stats)?;
321    }
322
323    Ok(())
324}
325
326/// Write files without token budget (normal mode)
327fn write_normal(
328    config: &Config,
329    files: &[PathBuf],
330    output: &mut OutputWriter,
331    stats: &mut Statistics,
332) -> Result<()> {
333    for path in files {
334        match fs::read_to_string(path) {
335            Ok(content) => {
336                let display_path = path.display().to_string();
337
338                if config.compress {
339                    let file_name = path
340                        .file_name()
341                        .map(|f| f.to_string_lossy().to_string())
342                        .unwrap_or_default();
343                    let is_full = config.is_full_match(&file_name);
344
345                    if is_full {
346                        output.write_file_content_with_mode(
347                            &display_path,
348                            &content,
349                            Some("full"),
350                        )?;
351                    } else if let Some(lang) = language_for_path(path) {
352                        match compress_source(&content, lang) {
353                            CompressResult::Compressed(compressed) => {
354                                output.write_file_content_with_mode(
355                                    &display_path,
356                                    &compressed,
357                                    Some("compressed"),
358                                )?;
359                                stats.add_compressed();
360                            }
361                            CompressResult::Fallback(original, reason) => {
362                                if let Some(reason) = reason {
363                                    eprintln!(
364                                        "Warning: compression failed for {}: {}, including full content",
365                                        display_path, reason
366                                    );
367                                }
368                                output.write_file_content_with_mode(
369                                    &display_path,
370                                    &original,
371                                    Some("full"),
372                                )?;
373                            }
374                        }
375                    } else {
376                        output.write_file_content_with_mode(
377                            &display_path,
378                            &content,
379                            Some("full"),
380                        )?;
381                    }
382                } else {
383                    output.write_file_content(&display_path, &content)?;
384                }
385            }
386            Err(e) => {
387                eprintln!("Error reading {}: {}", path.display(), e);
388            }
389        }
390    }
391
392    stats.add_output_bytes(output.bytes_written());
393    output.write_summary(stats)?;
394    Ok(())
395}
396
397/// Helper: Try to compress a file if applicable, returning the appropriate decision
398fn maybe_compress(
399    config: &Config,
400    path: &Path,
401    content: &str,
402    stats: &mut Statistics,
403) -> FileDecision {
404    let file_name = path
405        .file_name()
406        .map(|f| f.to_string_lossy().to_string())
407        .unwrap_or_default();
408
409    if config.is_full_match(&file_name) {
410        return FileDecision::IncludeFull(content.to_string());
411    }
412
413    if let Some(lang) = language_for_path(path) {
414        match compress_source(content, lang) {
415            CompressResult::Compressed(compressed) => {
416                stats.add_compressed();
417                FileDecision::IncludeCompressed(compressed)
418            }
419            CompressResult::Fallback(original, reason) => {
420                if let Some(reason) = reason {
421                    eprintln!(
422                        "Warning: compression failed for {}: {}, including full content",
423                        path.display(),
424                        reason
425                    );
426                }
427                FileDecision::IncludeFull(original)
428            }
429        }
430    } else {
431        FileDecision::IncludeFull(content.to_string())
432    }
433}
434
435/// Check if a file should be skipped, returning the reason if so
436fn should_skip(path: &Path, config: &Config) -> Option<SkipReason> {
437    if let Some(file_name) = path.file_name() {
438        if !config.should_include_by_match(&file_name.to_string_lossy()) {
439            return Some(SkipReason::Match);
440        }
441    }
442
443    if is_secret_file(path) {
444        return Some(SkipReason::Secret);
445    }
446
447    if let Some(ext) = path.extension() {
448        let ext_str = ext.to_string_lossy();
449        if !config.should_include_extension(&ext_str) {
450            return Some(SkipReason::Extension);
451        }
452
453        if is_binary_extension(path) {
454            return Some(SkipReason::Binary);
455        }
456    }
457
458    if exceeds_size_limit(path, config.max_file_size) {
459        return Some(SkipReason::TooLarge);
460    }
461
462    if is_binary_content(path) {
463        return Some(SkipReason::Binary);
464    }
465
466    None
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472
473    #[test]
474    fn test_should_skip_secret() {
475        let config = Config::default();
476        assert_eq!(
477            should_skip(Path::new(".env"), &config),
478            Some(SkipReason::Secret)
479        );
480        assert_eq!(
481            should_skip(Path::new("credentials.json"), &config),
482            Some(SkipReason::Secret)
483        );
484    }
485
486    #[test]
487    fn test_should_skip_binary_extension() {
488        let config = Config::default();
489        assert_eq!(
490            should_skip(Path::new("image.png"), &config),
491            Some(SkipReason::Binary)
492        );
493        assert_eq!(
494            should_skip(Path::new("binary.exe"), &config),
495            Some(SkipReason::Binary)
496        );
497    }
498
499    #[test]
500    fn test_should_skip_extension_filter() {
501        let config = Config {
502            include_extensions: Some(vec!["rs".to_string()]),
503            ..Default::default()
504        };
505
506        assert_eq!(
507            should_skip(Path::new("file.json"), &config),
508            Some(SkipReason::Extension)
509        );
510        assert_eq!(should_skip(Path::new("file.rs"), &config), None);
511    }
512
513    #[test]
514    fn test_should_skip_match_filter() {
515        let config = Config {
516            match_patterns: Some(vec![globset::Glob::new("*_test.go")
517                .unwrap()
518                .compile_matcher()]),
519            ..Default::default()
520        };
521
522        assert_eq!(
523            should_skip(Path::new("main.go"), &config),
524            Some(SkipReason::Match)
525        );
526        assert_eq!(should_skip(Path::new("user_test.go"), &config), None);
527    }
528}