1use clap::{CommandFactory, Parser};
2
3use std::fs;
4use std::io::{self, Write};
5use std::path::{Path, PathBuf};
6use std::time::Instant;
7
8pub mod cache;
9pub mod cli;
10pub mod config;
11pub mod config_resolver;
12pub mod diff;
13pub mod file_utils;
14pub mod markdown;
15pub mod state;
16pub mod token_count;
17pub mod tree;
18pub mod tree_sitter;
19
20use std::fs::File;
21
22use cache::CacheManager;
23use cli::Args;
24use config::{Config, load_config_from_path};
25use diff::render_per_file_diffs;
26use file_utils::{collect_files, confirm_overwrite, confirm_processing};
27use markdown::generate_markdown;
28use state::{ProjectState, StateComparison};
29use token_count::{count_file_tokens, count_tree_tokens, estimate_tokens};
30use tree::{build_file_tree, print_tree};
31
32#[derive(Debug, Clone)]
34pub struct DiffConfig {
35 pub context_lines: usize,
36 pub enabled: bool,
37 pub diff_only: bool,
38}
39
40impl Default for DiffConfig {
41 fn default() -> Self {
42 Self {
43 context_lines: 3,
44 enabled: false,
45 diff_only: false,
46 }
47 }
48}
49
50pub trait Prompter {
51 fn confirm_processing(&self, file_count: usize) -> io::Result<bool>;
52 fn confirm_overwrite(&self, file_path: &str) -> io::Result<bool>;
53}
54
55pub struct DefaultPrompter;
56
57impl Prompter for DefaultPrompter {
58 fn confirm_processing(&self, file_count: usize) -> io::Result<bool> {
59 confirm_processing(file_count)
60 }
61 fn confirm_overwrite(&self, file_path: &str) -> io::Result<bool> {
62 confirm_overwrite(file_path)
63 }
64}
65
66pub fn run_with_args(args: Args, config: Config, prompter: &impl Prompter) -> io::Result<()> {
67 let start_time = Instant::now();
68
69 let silent = std::env::var("CB_SILENT")
70 .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
71 .unwrap_or(false);
72
73 let final_args = args;
75 let mut resolved_base = PathBuf::from(&final_args.input);
78 let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
79 if resolved_base == Path::new(".")
80 && !cwd.join("context-builder.toml").exists()
81 && let Some(output_parent) = Path::new(&final_args.output).parent()
82 && output_parent
83 .file_name()
84 .map(|n| n == "output")
85 .unwrap_or(false)
86 && let Some(project_root) = output_parent.parent()
87 && project_root.join("context-builder.toml").exists()
88 {
89 resolved_base = project_root.to_path_buf();
90 }
91 let base_path = resolved_base.as_path();
92
93 if !base_path.exists() || !base_path.is_dir() {
94 if !silent {
95 eprintln!(
96 "Error: The specified input directory '{}' does not exist or is not a directory.",
97 final_args.input
98 );
99 }
100 return Err(io::Error::new(
101 io::ErrorKind::NotFound,
102 format!(
103 "Input directory '{}' does not exist or is not a directory",
104 final_args.input
105 ),
106 ));
107 }
108
109 let diff_config = if config.auto_diff.unwrap_or(false) {
111 Some(DiffConfig {
112 context_lines: config.diff_context_lines.unwrap_or(3),
113 enabled: true,
114 diff_only: final_args.diff_only,
115 })
116 } else {
117 None
118 };
119
120 if !final_args.preview
121 && !final_args.token_count
122 && Path::new(&final_args.output).exists()
123 && !final_args.yes
124 && !prompter.confirm_overwrite(&final_args.output)?
125 {
126 if !silent {
127 println!("Operation cancelled.");
128 }
129 return Err(io::Error::new(
130 io::ErrorKind::Interrupted,
131 "Operation cancelled by user",
132 ));
133 }
134
135 let mut auto_ignores: Vec<String> = vec![".context-builder".to_string()];
137
138 let output_path = Path::new(&final_args.output);
140 if let Ok(rel_output) = output_path.strip_prefix(base_path) {
141 if config.timestamped_output == Some(true) {
143 if let (Some(parent), Some(stem), Some(ext)) = (
145 rel_output.parent(),
146 output_path.file_stem().and_then(|s| s.to_str()),
147 output_path.extension().and_then(|s| s.to_str()),
148 ) {
149 let base_stem = if let Some(ref cfg_output) = config.output {
153 Path::new(cfg_output)
154 .file_stem()
155 .and_then(|s| s.to_str())
156 .unwrap_or(stem)
157 .to_string()
158 } else {
159 stem.to_string()
160 };
161 let glob = if parent == Path::new("") {
162 format!("{}_*.{}", base_stem, ext)
163 } else {
164 format!("{}/{}_*.{}", parent.display(), base_stem, ext)
165 };
166 auto_ignores.push(glob);
167 }
168 } else {
169 auto_ignores.push(rel_output.to_string_lossy().to_string());
171 }
172 } else {
173 let output_str = final_args.output.clone();
175 if config.timestamped_output == Some(true) {
176 if let (Some(stem), Some(ext)) = (
177 output_path.file_stem().and_then(|s| s.to_str()),
178 output_path.extension().and_then(|s| s.to_str()),
179 ) {
180 let base_stem = if let Some(ref cfg_output) = config.output {
181 Path::new(cfg_output)
182 .file_stem()
183 .and_then(|s| s.to_str())
184 .unwrap_or(stem)
185 .to_string()
186 } else {
187 stem.to_string()
188 };
189 if let Some(parent) = output_path.parent() {
190 let parent_str = parent.to_string_lossy();
191 if parent_str.is_empty() || parent_str == "." {
192 auto_ignores.push(format!("{}_*.{}", base_stem, ext));
193 } else {
194 auto_ignores.push(format!("{}/{}_*.{}", parent_str, base_stem, ext));
195 }
196 }
197 }
198 } else {
199 auto_ignores.push(output_str);
200 }
201 }
202
203 if let Some(ref output_folder) = config.output_folder {
205 auto_ignores.push(output_folder.clone());
206 }
207
208 let files = collect_files(
209 base_path,
210 &final_args.filter,
211 &final_args.ignore,
212 &auto_ignores,
213 )?;
214 let debug_config = std::env::var("CB_DEBUG_CONFIG").is_ok();
215 if debug_config {
216 eprintln!("[DEBUG][CONFIG] Args: {:?}", final_args);
217 eprintln!("[DEBUG][CONFIG] Raw Config: {:?}", config);
218 eprintln!("[DEBUG][CONFIG] Auto-ignores: {:?}", auto_ignores);
219 eprintln!("[DEBUG][CONFIG] Collected {} files", files.len());
220 for f in &files {
221 eprintln!("[DEBUG][CONFIG] - {}", f.path().display());
222 }
223 }
224
225 if !silent {
227 const LARGE_FILE_THRESHOLD: u64 = 100 * 1024; let mut large_files: Vec<(String, u64)> = Vec::new();
229 let mut total_size: u64 = 0;
230
231 for entry in &files {
232 if let Ok(metadata) = entry.path().metadata() {
233 let size = metadata.len();
234 total_size += size;
235 if size > LARGE_FILE_THRESHOLD {
236 let rel_path = entry
237 .path()
238 .strip_prefix(base_path)
239 .unwrap_or(entry.path())
240 .to_string_lossy()
241 .to_string();
242 large_files.push((rel_path, size));
243 }
244 }
245 }
246
247 if !large_files.is_empty() {
248 large_files.sort_by(|a, b| b.1.cmp(&a.1)); eprintln!(
250 "\nā {} large file(s) detected (>{} KB):",
251 large_files.len(),
252 LARGE_FILE_THRESHOLD / 1024
253 );
254 for (path, size) in large_files.iter().take(5) {
255 eprintln!(" {:>8} KB {}", size / 1024, path);
256 }
257 if large_files.len() > 5 {
258 eprintln!(" ... and {} more", large_files.len() - 5);
259 }
260 eprintln!(
261 " Total context size: {} KB across {} files\n",
262 total_size / 1024,
263 files.len()
264 );
265 }
266 }
267 let file_tree = build_file_tree(&files, base_path);
268
269 if final_args.preview {
270 if !silent {
271 println!("\n# File Tree Structure (Preview)\n");
272 print_tree(&file_tree, 0);
273 }
274 if !final_args.token_count {
275 return Ok(());
276 }
277 }
278
279 if final_args.token_count {
280 if !silent {
281 println!("\n# Token Count Estimation\n");
282 let mut total_tokens = 0;
283 total_tokens += estimate_tokens("# Directory Structure Report\n\n");
284 if !final_args.filter.is_empty() {
285 total_tokens += estimate_tokens(&format!(
286 "This document contains files from the `{}` directory with extensions: {} \n",
287 final_args.input,
288 final_args.filter.join(", ")
289 ));
290 } else {
291 total_tokens += estimate_tokens(&format!(
292 "This document contains all files from the `{}` directory, optimized for LLM consumption.\n",
293 final_args.input
294 ));
295 }
296 if !final_args.ignore.is_empty() {
297 total_tokens += estimate_tokens(&format!(
298 "Custom ignored patterns: {} \n",
299 final_args.ignore.join(", ")
300 ));
301 }
302 total_tokens += estimate_tokens("Content hash: 0000000000000000\n\n");
303 total_tokens += estimate_tokens("## File Tree Structure\n\n");
304 let tree_tokens = count_tree_tokens(&file_tree, 0);
305 total_tokens += tree_tokens;
306 let file_tokens: usize = files
307 .iter()
308 .map(|entry| count_file_tokens(base_path, entry, final_args.line_numbers))
309 .sum();
310 total_tokens += file_tokens;
311 println!("Estimated total tokens: {}", total_tokens);
312 println!("File tree tokens: {}", tree_tokens);
313 println!("File content tokens: {}", file_tokens);
314 }
315 return Ok(());
316 }
317
318 if !final_args.yes && !prompter.confirm_processing(files.len())? {
319 if !silent {
320 println!("Operation cancelled.");
321 }
322 return Err(io::Error::new(
323 io::ErrorKind::Interrupted,
324 "Operation cancelled by user",
325 ));
326 }
327
328 if config.auto_diff.unwrap_or(false) {
333 let mut effective_config = config.clone();
338 if !final_args.filter.is_empty() {
340 effective_config.filter = Some(final_args.filter.clone());
341 }
342 if !final_args.ignore.is_empty() {
343 effective_config.ignore = Some(final_args.ignore.clone());
344 }
345 effective_config.line_numbers = Some(final_args.line_numbers);
346
347 let current_state = ProjectState::from_files(
349 &files,
350 base_path,
351 &effective_config,
352 final_args.line_numbers,
353 )?;
354
355 let cache_manager = CacheManager::new(base_path, &effective_config);
357 let previous_state = match cache_manager.read_cache() {
358 Ok(state) => state,
359 Err(e) => {
360 if !silent {
361 eprintln!(
362 "Warning: Failed to read cache (proceeding without diff): {}",
363 e
364 );
365 }
366 None
367 }
368 };
369
370 let diff_cfg = diff_config.as_ref().unwrap();
371
372 let effective_previous = if let Some(prev) = previous_state.as_ref() {
374 if prev.config_hash != current_state.config_hash {
375 None
377 } else {
378 Some(prev)
379 }
380 } else {
381 None
382 };
383
384 let comparison = effective_previous.map(|prev| current_state.compare_with(prev));
386
387 let debug_autodiff = std::env::var("CB_DEBUG_AUTODIFF").is_ok();
388 if debug_autodiff {
389 eprintln!(
390 "[DEBUG][AUTODIFF] cache file: {}",
391 cache_manager.debug_cache_file_path().display()
392 );
393 eprintln!(
394 "[DEBUG][AUTODIFF] config_hash current={} prev={:?} invalidated={}",
395 current_state.config_hash,
396 previous_state.as_ref().map(|s| s.config_hash.clone()),
397 effective_previous.is_none() && previous_state.is_some()
398 );
399 eprintln!("[DEBUG][AUTODIFF] effective_config: {:?}", effective_config);
400 if let Some(prev) = previous_state.as_ref() {
401 eprintln!("[DEBUG][AUTODIFF] raw previous files: {}", prev.files.len());
402 }
403 if let Some(prev) = effective_previous {
404 eprintln!(
405 "[DEBUG][AUTODIFF] effective previous files: {}",
406 prev.files.len()
407 );
408 for k in prev.files.keys() {
409 eprintln!(" PREV: {}", k.display());
410 }
411 }
412 eprintln!(
413 "[DEBUG][AUTODIFF] current files: {}",
414 current_state.files.len()
415 );
416 for k in current_state.files.keys() {
417 eprintln!(" CURR: {}", k.display());
418 }
419 }
420
421 let cwd = std::env::current_dir().unwrap_or_else(|_| base_path.to_path_buf());
426 let sorted_paths: Vec<PathBuf> = files
427 .iter()
428 .map(|entry| {
429 entry
430 .path()
431 .strip_prefix(base_path)
432 .or_else(|_| entry.path().strip_prefix(&cwd))
433 .map(|p| p.to_path_buf())
434 .unwrap_or_else(|_| {
435 entry
436 .path()
437 .file_name()
438 .map(PathBuf::from)
439 .unwrap_or_else(|| entry.path().to_path_buf())
440 })
441 })
442 .collect();
443
444 let mut final_doc = generate_markdown_with_diff(
446 ¤t_state,
447 comparison.as_ref(),
448 &final_args,
449 &file_tree,
450 diff_cfg,
451 &sorted_paths,
452 )?;
453
454 if let Some(max_tokens) = final_args.max_tokens {
456 let max_bytes = max_tokens.saturating_mul(4);
457 if final_doc.len() > max_bytes {
458 let mut truncate_at = max_bytes;
460 while truncate_at > 0 && !final_doc.is_char_boundary(truncate_at) {
461 truncate_at -= 1;
462 }
463 final_doc.truncate(truncate_at);
464
465 let fence_count = final_doc.matches("\n```").count()
469 + if final_doc.starts_with("```") { 1 } else { 0 };
470 if fence_count % 2 != 0 {
471 final_doc.push_str("\n```\n");
472 }
473
474 final_doc.push_str("\n---\n\n");
475 final_doc.push_str(&format!(
476 "_Output truncated: exceeded {} token budget (estimated)._\n",
477 max_tokens
478 ));
479 }
480 }
481
482 let output_path = Path::new(&final_args.output);
484 if let Some(parent) = output_path.parent()
485 && !parent.exists()
486 && let Err(e) = fs::create_dir_all(parent)
487 {
488 return Err(io::Error::other(format!(
489 "Failed to create output directory {}: {}",
490 parent.display(),
491 e
492 )));
493 }
494 let mut final_output = fs::File::create(output_path)?;
495 final_output.write_all(final_doc.as_bytes())?;
496
497 if let Err(e) = cache_manager.write_cache(¤t_state)
499 && !silent
500 {
501 eprintln!("Warning: failed to update state cache: {}", e);
502 }
503
504 let duration = start_time.elapsed();
505 if !silent {
506 if let Some(comp) = &comparison {
507 if comp.summary.has_changes() {
508 println!(
509 "Documentation created successfully with {} changes: {}",
510 comp.summary.total_changes, final_args.output
511 );
512 } else {
513 println!(
514 "Documentation created successfully (no changes detected): {}",
515 final_args.output
516 );
517 }
518 } else {
519 println!(
520 "Documentation created successfully (initial state): {}",
521 final_args.output
522 );
523 }
524 println!("Processing time: {:.2?}", duration);
525
526 let output_bytes = final_doc.len();
528 print_context_window_warning(output_bytes, final_args.max_tokens);
529 }
530 return Ok(());
531 }
532
533 let ts_config = markdown::TreeSitterConfig {
536 signatures: final_args.signatures,
537 structure: final_args.structure,
538 truncate: final_args.truncate.clone(),
539 visibility: final_args.visibility.clone(),
540 };
541
542 if !silent && (ts_config.signatures || ts_config.structure || ts_config.truncate == "smart") {
544 #[cfg(not(feature = "tree-sitter-base"))]
545 {
546 eprintln!("ā ļø --signatures/--structure/--truncate smart require tree-sitter support.");
547 eprintln!(" Build with: cargo build --features tree-sitter-all");
548 eprintln!(" Falling back to standard output.\n");
549 }
550 }
551
552 generate_markdown(
553 &final_args.output,
554 &final_args.input,
555 &final_args.filter,
556 &final_args.ignore,
557 &file_tree,
558 &files,
559 base_path,
560 final_args.line_numbers,
561 config.encoding_strategy.as_deref(),
562 final_args.max_tokens,
563 &ts_config,
564 )?;
565
566 let duration = start_time.elapsed();
567 if !silent {
568 println!("Documentation created successfully: {}", final_args.output);
569 println!("Processing time: {:.2?}", duration);
570
571 let output_bytes = fs::metadata(&final_args.output)
573 .map(|m| m.len() as usize)
574 .unwrap_or(0);
575 print_context_window_warning(output_bytes, final_args.max_tokens);
576 }
577
578 Ok(())
579}
580
581fn print_context_window_warning(output_bytes: usize, max_tokens: Option<usize>) {
586 let estimated_tokens = output_bytes / 4;
587
588 println!("Estimated tokens: ~{}K", estimated_tokens / 1000);
589
590 if max_tokens.is_some() {
592 return;
593 }
594
595 const RECOMMENDED_LIMIT: usize = 128_000;
596
597 if estimated_tokens <= RECOMMENDED_LIMIT {
598 return;
599 }
600
601 eprintln!();
602 eprintln!(
603 "ā ļø Output is ~{}K tokens ā recommended limit is 128K for effective LLM context.",
604 estimated_tokens / 1000
605 );
606 eprintln!(" Large contexts degrade response quality. Consider narrowing the scope:");
607 eprintln!();
608 eprintln!(" ⢠--max-tokens 100000 Cap output to a token budget");
609 eprintln!(" ⢠--filter rs,toml Include only specific file types");
610 eprintln!(" ⢠--ignore docs,assets Exclude directories by name");
611 eprintln!(" ⢠--token-count Preview size without generating");
612 eprintln!();
613}
614
615fn generate_markdown_with_diff(
617 current_state: &ProjectState,
618 comparison: Option<&StateComparison>,
619 args: &Args,
620 file_tree: &tree::FileTree,
621 diff_config: &DiffConfig,
622 sorted_paths: &[PathBuf],
623) -> io::Result<String> {
624 let mut output = String::new();
625
626 output.push_str("# Directory Structure Report\n\n");
628
629 output.push_str(&format!(
631 "**Project:** {}\n",
632 current_state.metadata.project_name
633 ));
634 output.push_str(&format!("**Generated:** {}\n", current_state.timestamp));
635
636 if !args.filter.is_empty() {
637 output.push_str(&format!("**Filters:** {}\n", args.filter.join(", ")));
638 }
639
640 if !args.ignore.is_empty() {
641 output.push_str(&format!("**Ignored:** {}\n", args.ignore.join(", ")));
642 }
643
644 output.push('\n');
645
646 if let Some(comp) = comparison {
648 if comp.summary.has_changes() {
649 output.push_str(&comp.summary.to_markdown());
650
651 let added_files: Vec<_> = comp
653 .file_diffs
654 .iter()
655 .filter(|d| matches!(d.status, diff::PerFileStatus::Added))
656 .collect();
657
658 if diff_config.diff_only && !added_files.is_empty() {
659 output.push_str("## Added Files\n\n");
660 for added in added_files {
661 output.push_str(&format!("### File: `{}`\n\n", added.path));
662 output.push_str("_Status: Added_\n\n");
663 let mut lines: Vec<String> = Vec::new();
665 for line in added.diff.lines() {
666 if let Some(rest) = line.strip_prefix("+ ") {
669 lines.push(rest.to_string());
670 } else if let Some(rest) = line.strip_prefix('+') {
671 lines.push(rest.to_string());
673 }
674 }
675 output.push_str("```text\n");
676 if args.line_numbers {
677 for (idx, l) in lines.iter().enumerate() {
678 output.push_str(&format!("{:>4} | {}\n", idx + 1, l));
679 }
680 } else {
681 for l in lines {
682 output.push_str(&l);
683 output.push('\n');
684 }
685 }
686 output.push_str("```\n\n");
687 }
688 }
689
690 let changed_diffs: Vec<diff::PerFileDiff> = comp
692 .file_diffs
693 .iter()
694 .filter(|d| d.is_changed())
695 .cloned()
696 .collect();
697 if !changed_diffs.is_empty() {
698 output.push_str("## File Differences\n\n");
699 let diff_markdown = render_per_file_diffs(&changed_diffs);
700 output.push_str(&diff_markdown);
701 }
702 } else {
703 output.push_str("## No Changes Detected\n\n");
704 }
705 }
706
707 output.push_str("## File Tree Structure\n\n");
709 let mut tree_output = Vec::new();
710 tree::write_tree_to_file(&mut tree_output, file_tree, 0)?;
711 output.push_str(&String::from_utf8_lossy(&tree_output));
712 output.push('\n');
713
714 if !diff_config.diff_only {
716 output.push_str("## File Contents\n\n");
717
718 for path in sorted_paths {
721 if let Some(file_state) = current_state.files.get(path) {
722 output.push_str(&format!("### File: `{}`\n\n", path.display()));
723 output.push_str(&format!("- Size: {} bytes\n", file_state.size));
724 output.push_str(&format!("- Modified: {:?}\n\n", file_state.modified));
725
726 let extension = path.extension().and_then(|s| s.to_str()).unwrap_or("text");
728 let language = match extension {
729 "rs" => "rust",
730 "js" => "javascript",
731 "ts" => "typescript",
732 "py" => "python",
733 "json" => "json",
734 "toml" => "toml",
735 "md" => "markdown",
736 "yaml" | "yml" => "yaml",
737 "html" => "html",
738 "css" => "css",
739 _ => extension,
740 };
741
742 output.push_str(&format!("```{}\n", language));
743
744 if args.line_numbers {
745 for (i, line) in file_state.content.lines().enumerate() {
746 output.push_str(&format!("{:>4} | {}\n", i + 1, line));
747 }
748 } else {
749 output.push_str(&file_state.content);
750 if !file_state.content.ends_with('\n') {
751 output.push('\n');
752 }
753 }
754
755 output.push_str("```\n\n");
756 }
757 }
758 }
759
760 Ok(output)
761}
762
763pub fn run() -> io::Result<()> {
764 env_logger::init();
765 let args = Args::parse();
766
767 if args.init {
769 return init_config();
770 }
771
772 let project_root = Path::new(&args.input);
774 let config = load_config_from_path(project_root);
775
776 if args.clear_cache {
778 let cache_path = project_root.join(".context-builder").join("cache");
779 if cache_path.exists() {
780 match fs::remove_dir_all(&cache_path) {
781 Ok(()) => println!("Cache cleared: {}", cache_path.display()),
782 Err(e) => eprintln!("Failed to clear cache ({}): {}", cache_path.display(), e),
783 }
784 } else {
785 println!("No cache directory found at {}", cache_path.display());
786 }
787 return Ok(());
788 }
789
790 if std::env::args().len() == 1 && config.is_none() {
791 Args::command().print_help()?;
792 return Ok(());
793 }
794
795 let resolution = crate::config_resolver::resolve_final_config(args, config.clone());
797
798 let silent = std::env::var("CB_SILENT")
800 .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
801 .unwrap_or(false);
802
803 if !silent {
804 for warning in &resolution.warnings {
805 eprintln!("Warning: {}", warning);
806 }
807 }
808
809 let final_args = Args {
811 input: resolution.config.input,
812 output: resolution.config.output,
813 filter: resolution.config.filter,
814 ignore: resolution.config.ignore,
815 line_numbers: resolution.config.line_numbers,
816 preview: resolution.config.preview,
817 token_count: resolution.config.token_count,
818 yes: resolution.config.yes,
819 diff_only: resolution.config.diff_only,
820 clear_cache: resolution.config.clear_cache,
821 max_tokens: resolution.config.max_tokens,
822 init: false,
823 signatures: resolution.config.signatures,
824 structure: resolution.config.structure,
825 truncate: resolution.config.truncate,
826 visibility: resolution.config.visibility,
827 };
828
829 let final_config = Config {
831 auto_diff: Some(resolution.config.auto_diff),
832 diff_context_lines: Some(resolution.config.diff_context_lines),
833 ..config.unwrap_or_default()
834 };
835
836 run_with_args(final_args, final_config, &DefaultPrompter)
837}
838
839fn detect_major_file_types() -> io::Result<Vec<String>> {
841 use std::collections::HashMap;
842 let mut extension_counts = HashMap::new();
843
844 let default_ignores = vec![
846 "docs".to_string(),
847 "target".to_string(),
848 ".git".to_string(),
849 "node_modules".to_string(),
850 ];
851
852 let files = crate::file_utils::collect_files(Path::new("."), &[], &default_ignores, &[])?;
854
855 for entry in files {
857 let path = entry.path();
858 if let Some(extension) = path.extension().and_then(|ext| ext.to_str()) {
859 *extension_counts.entry(extension.to_string()).or_insert(0) += 1;
861 }
862 }
863
864 let mut extensions: Vec<(String, usize)> = extension_counts.into_iter().collect();
866 extensions.sort_by(|a, b| b.1.cmp(&a.1));
867
868 let top_extensions: Vec<String> = extensions.into_iter().take(5).map(|(ext, _)| ext).collect();
870
871 Ok(top_extensions)
872}
873
874fn init_config() -> io::Result<()> {
876 let config_path = Path::new("context-builder.toml");
877
878 if config_path.exists() {
879 println!("Config file already exists at {}", config_path.display());
880 println!("If you want to replace it, please remove it manually first.");
881 return Ok(());
882 }
883
884 let filter_suggestions = match detect_major_file_types() {
886 Ok(extensions) => extensions,
887 _ => vec!["rs".to_string(), "toml".to_string()], };
889
890 let filter_string = if filter_suggestions.is_empty() {
891 r#"["rs", "toml"]"#.to_string()
892 } else {
893 format!(r#"["{}"]"#, filter_suggestions.join(r#"", ""#))
894 };
895
896 let default_config_content = format!(
897 r#"# Context Builder Configuration File
898# This file was generated with sensible defaults based on the file types detected in your project
899
900# Output file name (or base name when timestamped_output is true)
901output = "context.md"
902
903# Optional folder to place the generated output file(s) in
904output_folder = "docs"
905
906# Append a UTC timestamp to the output file name (before extension)
907timestamped_output = true
908
909# Enable automatic diff generation (requires timestamped_output = true)
910auto_diff = true
911
912# Emit only change summary + modified file diffs (no full file bodies)
913diff_only = false
914
915# File extensions to include (no leading dot, e.g. "rs", "toml")
916filter = {}
917
918# File / directory names to ignore (exact name matches)
919ignore = ["docs", "target", ".git", "node_modules"]
920
921# Add line numbers to code blocks
922line_numbers = false
923"#,
924 filter_string
925 );
926
927 let mut file = File::create(config_path)?;
928 file.write_all(default_config_content.as_bytes())?;
929
930 println!("Config file created at {}", config_path.display());
931 println!("Detected file types: {}", filter_suggestions.join(", "));
932 println!("You can now customize it according to your project needs.");
933
934 Ok(())
935}
936
937#[cfg(test)]
938mod tests {
939 use super::*;
940 use std::io::Result;
941 use tempfile::tempdir;
942
943 struct MockPrompter {
945 confirm_processing_response: bool,
946 confirm_overwrite_response: bool,
947 }
948
949 impl MockPrompter {
950 fn new(processing: bool, overwrite: bool) -> Self {
951 Self {
952 confirm_processing_response: processing,
953 confirm_overwrite_response: overwrite,
954 }
955 }
956 }
957
958 impl Prompter for MockPrompter {
959 fn confirm_processing(&self, _file_count: usize) -> Result<bool> {
960 Ok(self.confirm_processing_response)
961 }
962
963 fn confirm_overwrite(&self, _file_path: &str) -> Result<bool> {
964 Ok(self.confirm_overwrite_response)
965 }
966 }
967
968 #[test]
969 fn test_diff_config_default() {
970 let config = DiffConfig::default();
971 assert_eq!(config.context_lines, 3);
972 assert!(!config.enabled);
973 assert!(!config.diff_only);
974 }
975
976 #[test]
977 fn test_diff_config_custom() {
978 let config = DiffConfig {
979 context_lines: 5,
980 enabled: true,
981 diff_only: true,
982 };
983 assert_eq!(config.context_lines, 5);
984 assert!(config.enabled);
985 assert!(config.diff_only);
986 }
987
988 #[test]
989 fn test_default_prompter() {
990 let prompter = DefaultPrompter;
991
992 let result = prompter.confirm_processing(50);
994 assert!(result.is_ok());
995 assert!(result.unwrap());
996 }
997
998 #[test]
999 fn test_run_with_args_nonexistent_directory() {
1000 let args = Args {
1001 input: "/nonexistent/directory".to_string(),
1002 output: "output.md".to_string(),
1003 filter: vec![],
1004 ignore: vec![],
1005 line_numbers: false,
1006 preview: false,
1007 token_count: false,
1008 yes: false,
1009 diff_only: false,
1010 clear_cache: false,
1011 init: false,
1012 max_tokens: None,
1013 signatures: false,
1014 structure: false,
1015 truncate: "smart".to_string(),
1016 visibility: "all".to_string(),
1017 };
1018 let config = Config::default();
1019 let prompter = MockPrompter::new(true, true);
1020
1021 let result = run_with_args(args, config, &prompter);
1022 assert!(result.is_err());
1023 assert!(result.unwrap_err().to_string().contains("does not exist"));
1024 }
1025
1026 #[test]
1027 fn test_run_with_args_preview_mode() {
1028 let temp_dir = tempdir().unwrap();
1029 let base_path = temp_dir.path();
1030
1031 fs::write(base_path.join("test.rs"), "fn main() {}").unwrap();
1033 fs::create_dir(base_path.join("src")).unwrap();
1034 fs::write(base_path.join("src/lib.rs"), "pub fn hello() {}").unwrap();
1035
1036 let args = Args {
1037 input: ".".to_string(),
1038 output: "test.md".to_string(),
1039 filter: vec![],
1040 ignore: vec![],
1041 line_numbers: false,
1042 preview: false,
1043 token_count: false,
1044 yes: false,
1045 diff_only: false,
1046 clear_cache: false,
1047 init: false,
1048 max_tokens: None,
1049 signatures: false,
1050 structure: false,
1051 truncate: "smart".to_string(),
1052 visibility: "all".to_string(),
1053 };
1054 let config = Config::default();
1055 let prompter = MockPrompter::new(true, true);
1056
1057 unsafe {
1059 std::env::set_var("CB_SILENT", "1");
1060 }
1061 let result = run_with_args(args, config, &prompter);
1062 unsafe {
1063 std::env::remove_var("CB_SILENT");
1064 }
1065
1066 assert!(result.is_ok());
1067 }
1068
1069 #[test]
1070 fn test_run_with_args_token_count_mode() {
1071 let temp_dir = tempdir().unwrap();
1072 let base_path = temp_dir.path();
1073
1074 fs::write(base_path.join("small.txt"), "Hello world").unwrap();
1076
1077 let args = Args {
1078 input: base_path.to_string_lossy().to_string(),
1079 output: "test.md".to_string(),
1080 filter: vec![],
1081 ignore: vec![],
1082 line_numbers: false,
1083 preview: false,
1084 token_count: true,
1085 yes: false,
1086 diff_only: false,
1087 clear_cache: false,
1088 init: false,
1089 max_tokens: None,
1090 signatures: false,
1091 structure: false,
1092 truncate: "smart".to_string(),
1093 visibility: "all".to_string(),
1094 };
1095 let config = Config::default();
1096 let prompter = MockPrompter::new(true, true);
1097
1098 unsafe {
1099 std::env::set_var("CB_SILENT", "1");
1100 }
1101 let result = run_with_args(args, config, &prompter);
1102 unsafe {
1103 std::env::remove_var("CB_SILENT");
1104 }
1105
1106 assert!(result.is_ok());
1107 }
1108
1109 #[test]
1110 fn test_run_with_args_preview_and_token_count() {
1111 let temp_dir = tempdir().unwrap();
1112 let base_path = temp_dir.path();
1113
1114 fs::write(base_path.join("test.txt"), "content").unwrap();
1115
1116 let args = Args {
1117 input: base_path.to_string_lossy().to_string(),
1118 output: "test.md".to_string(),
1119 filter: vec![],
1120 ignore: vec![],
1121 line_numbers: false,
1122 preview: true,
1123 token_count: false,
1124 yes: false,
1125 diff_only: false,
1126 clear_cache: false,
1127 init: false,
1128 max_tokens: None,
1129 signatures: false,
1130 structure: false,
1131 truncate: "smart".to_string(),
1132 visibility: "all".to_string(),
1133 };
1134 let config = Config::default();
1135 let prompter = MockPrompter::new(true, true);
1136
1137 unsafe {
1138 std::env::set_var("CB_SILENT", "1");
1139 }
1140 let result = run_with_args(args, config, &prompter);
1141 unsafe {
1142 std::env::remove_var("CB_SILENT");
1143 }
1144
1145 assert!(result.is_ok());
1146 }
1147
1148 #[test]
1149 fn test_run_with_args_user_cancels_overwrite() {
1150 let temp_dir = tempdir().unwrap();
1151 let base_path = temp_dir.path();
1152 let output_path = temp_dir.path().join("existing.md");
1153
1154 fs::write(base_path.join("test.txt"), "content").unwrap();
1156 fs::write(&output_path, "existing content").unwrap();
1157
1158 let args = Args {
1159 input: base_path.to_string_lossy().to_string(),
1160 output: "test.md".to_string(),
1161 filter: vec![],
1162 ignore: vec!["target".to_string()],
1163 line_numbers: false,
1164 preview: false,
1165 token_count: false,
1166 yes: false,
1167 diff_only: false,
1168 clear_cache: false,
1169 init: false,
1170 max_tokens: None,
1171 signatures: false,
1172 structure: false,
1173 truncate: "smart".to_string(),
1174 visibility: "all".to_string(),
1175 };
1176 let config = Config::default();
1177 let prompter = MockPrompter::new(true, false); unsafe {
1180 std::env::set_var("CB_SILENT", "1");
1181 }
1182 let result = run_with_args(args, config, &prompter);
1183 unsafe {
1184 std::env::remove_var("CB_SILENT");
1185 }
1186
1187 assert!(result.is_err());
1188 assert!(result.unwrap_err().to_string().contains("cancelled"));
1189 }
1190
1191 #[test]
1192 fn test_run_with_args_user_cancels_processing() {
1193 let temp_dir = tempdir().unwrap();
1194 let base_path = temp_dir.path();
1195
1196 for i in 0..105 {
1198 fs::write(base_path.join(format!("file{}.txt", i)), "content").unwrap();
1199 }
1200
1201 let args = Args {
1202 input: base_path.to_string_lossy().to_string(),
1203 output: "test.md".to_string(),
1204 filter: vec!["rs".to_string()],
1205 ignore: vec![],
1206 line_numbers: false,
1207 preview: false,
1208 token_count: false,
1209 yes: false,
1210 diff_only: false,
1211 clear_cache: false,
1212 init: false,
1213 max_tokens: None,
1214 signatures: false,
1215 structure: false,
1216 truncate: "smart".to_string(),
1217 visibility: "all".to_string(),
1218 };
1219 let config = Config::default();
1220 let prompter = MockPrompter::new(false, true); unsafe {
1223 std::env::set_var("CB_SILENT", "1");
1224 }
1225 let result = run_with_args(args, config, &prompter);
1226 unsafe {
1227 std::env::remove_var("CB_SILENT");
1228 }
1229
1230 assert!(result.is_err());
1231 assert!(result.unwrap_err().to_string().contains("cancelled"));
1232 }
1233
1234 #[test]
1235 fn test_run_with_args_with_yes_flag() {
1236 let temp_dir = tempdir().unwrap();
1237 let base_path = temp_dir.path();
1238 let output_file_name = "test.md";
1239 let output_path = temp_dir.path().join(output_file_name);
1240
1241 fs::write(base_path.join("test.txt"), "Hello world").unwrap();
1242
1243 let args = Args {
1244 input: base_path.to_string_lossy().to_string(),
1245 output: output_path.to_string_lossy().to_string(),
1246 filter: vec![],
1247 ignore: vec!["ignored_dir".to_string()],
1248 line_numbers: false,
1249 preview: false,
1250 token_count: false,
1251 yes: true,
1252 diff_only: false,
1253 clear_cache: false,
1254 init: false,
1255 max_tokens: None,
1256 signatures: false,
1257 structure: false,
1258 truncate: "smart".to_string(),
1259 visibility: "all".to_string(),
1260 };
1261 let config = Config::default();
1262 let prompter = MockPrompter::new(true, true);
1263
1264 unsafe {
1265 std::env::set_var("CB_SILENT", "1");
1266 }
1267 let result = run_with_args(args, config, &prompter);
1268 unsafe {
1269 std::env::remove_var("CB_SILENT");
1270 }
1271
1272 assert!(result.is_ok());
1273 assert!(output_path.exists());
1274
1275 let content = fs::read_to_string(&output_path).unwrap();
1276 assert!(content.contains("Directory Structure Report"));
1277 assert!(content.contains("test.txt"));
1278 }
1279
1280 #[test]
1281 fn test_run_with_args_with_filters() {
1282 let temp_dir = tempdir().unwrap();
1283 let base_path = temp_dir.path();
1284 let output_file_name = "test.md";
1285 let output_path = temp_dir.path().join(output_file_name);
1286
1287 fs::write(base_path.join("code.rs"), "fn main() {}").unwrap();
1288 fs::write(base_path.join("readme.md"), "# README").unwrap();
1289 fs::write(base_path.join("data.json"), r#"{"key": "value"}"#).unwrap();
1290
1291 let args = Args {
1292 input: base_path.to_string_lossy().to_string(),
1293 output: output_path.to_string_lossy().to_string(),
1294 filter: vec!["rs".to_string(), "md".to_string()],
1295 ignore: vec![],
1296 line_numbers: true,
1297 preview: false,
1298 token_count: false,
1299 yes: true,
1300 diff_only: false,
1301 clear_cache: false,
1302 init: false,
1303 max_tokens: None,
1304 signatures: false,
1305 structure: false,
1306 truncate: "smart".to_string(),
1307 visibility: "all".to_string(),
1308 };
1309 let config = Config::default();
1310 let prompter = MockPrompter::new(true, true);
1311
1312 unsafe {
1313 std::env::set_var("CB_SILENT", "1");
1314 }
1315 let result = run_with_args(args, config, &prompter);
1316 unsafe {
1317 std::env::remove_var("CB_SILENT");
1318 }
1319
1320 assert!(result.is_ok());
1321
1322 let content = fs::read_to_string(&output_path).unwrap();
1323 assert!(content.contains("code.rs"));
1324 assert!(content.contains("readme.md"));
1325 assert!(!content.contains("data.json")); assert!(content.contains(" 1 |")); }
1328
1329 #[test]
1330 fn test_run_with_args_with_ignores() {
1331 let temp_dir = tempdir().unwrap();
1332 let base_path = temp_dir.path();
1333 let output_path = temp_dir.path().join("ignored.md");
1334
1335 fs::write(base_path.join("important.txt"), "important content").unwrap();
1336 fs::write(base_path.join("secret.txt"), "secret content").unwrap();
1337
1338 let args = Args {
1339 input: base_path.to_string_lossy().to_string(),
1340 output: output_path.to_string_lossy().to_string(),
1341 filter: vec![],
1342 ignore: vec!["secret.txt".to_string()],
1343 line_numbers: false,
1344 preview: false,
1345 token_count: false,
1346 yes: true,
1347 diff_only: false,
1348 clear_cache: false,
1349 init: false,
1350 max_tokens: None,
1351 signatures: false,
1352 structure: false,
1353 truncate: "smart".to_string(),
1354 visibility: "all".to_string(),
1355 };
1356 let config = Config::default();
1357 let prompter = MockPrompter::new(true, true);
1358
1359 unsafe {
1360 std::env::set_var("CB_SILENT", "1");
1361 }
1362 let result = run_with_args(args, config, &prompter);
1363 unsafe {
1364 std::env::remove_var("CB_SILENT");
1365 }
1366
1367 assert!(result.is_ok());
1368
1369 let content = fs::read_to_string(&output_path).unwrap();
1370 assert!(content.contains("important.txt"));
1371 }
1374
1375 #[test]
1376 fn test_auto_diff_without_previous_state() {
1377 let temp_dir = tempdir().unwrap();
1378 let base_path = temp_dir.path();
1379 let output_file_name = "test.md";
1380 let output_path = temp_dir.path().join(output_file_name);
1381
1382 fs::write(base_path.join("new.txt"), "new content").unwrap();
1383
1384 let args = Args {
1385 input: base_path.to_string_lossy().to_string(),
1386 output: output_path.to_string_lossy().to_string(),
1387 filter: vec![],
1388 ignore: vec![],
1389 line_numbers: false,
1390 preview: false,
1391 token_count: false,
1392 yes: true,
1393 diff_only: false,
1394 clear_cache: false,
1395 init: false,
1396 max_tokens: None,
1397 signatures: false,
1398 structure: false,
1399 truncate: "smart".to_string(),
1400 visibility: "all".to_string(),
1401 };
1402 let config = Config {
1403 auto_diff: Some(true),
1404 diff_context_lines: Some(5),
1405 ..Default::default()
1406 };
1407 let prompter = MockPrompter::new(true, true);
1408
1409 unsafe {
1410 std::env::set_var("CB_SILENT", "1");
1411 }
1412 let result = run_with_args(args, config, &prompter);
1413 unsafe {
1414 std::env::remove_var("CB_SILENT");
1415 }
1416
1417 assert!(result.is_ok());
1418 assert!(output_path.exists());
1419
1420 let content = fs::read_to_string(&output_path).unwrap();
1421 assert!(content.contains("new.txt"));
1422 }
1423
1424 #[test]
1425 fn test_run_creates_output_directory() {
1426 let temp_dir = tempdir().unwrap();
1427 let base_path = temp_dir.path();
1428 let output_dir = temp_dir.path().join("nested").join("output");
1429 let output_path = output_dir.join("result.md");
1430
1431 fs::write(base_path.join("test.txt"), "content").unwrap();
1432
1433 let args = Args {
1434 input: base_path.to_string_lossy().to_string(),
1435 output: output_path.to_string_lossy().to_string(),
1436 filter: vec![],
1437 ignore: vec![],
1438 line_numbers: false,
1439 preview: false,
1440 token_count: false,
1441 yes: true,
1442 diff_only: false,
1443 clear_cache: false,
1444 init: false,
1445 max_tokens: None,
1446 signatures: false,
1447 structure: false,
1448 truncate: "smart".to_string(),
1449 visibility: "all".to_string(),
1450 };
1451 let config = Config::default();
1452 let prompter = MockPrompter::new(true, true);
1453
1454 unsafe {
1455 std::env::set_var("CB_SILENT", "1");
1456 }
1457 let result = run_with_args(args, config, &prompter);
1458 unsafe {
1459 std::env::remove_var("CB_SILENT");
1460 }
1461
1462 assert!(result.is_ok());
1463 assert!(output_path.exists());
1464 assert!(output_dir.exists());
1465 }
1466
1467 #[test]
1468 fn test_generate_markdown_with_diff_no_comparison() {
1469 let temp_dir = tempdir().unwrap();
1470 let base_path = temp_dir.path();
1471
1472 fs::write(base_path.join("test.rs"), "fn main() {}").unwrap();
1473
1474 let files = collect_files(base_path, &[], &[], &[]).unwrap();
1475 let file_tree = build_file_tree(&files, base_path);
1476 let config = Config::default();
1477 let state = ProjectState::from_files(&files, base_path, &config, false).unwrap();
1478
1479 let args = Args {
1480 input: base_path.to_string_lossy().to_string(),
1481 output: "test.md".to_string(),
1482 filter: vec![],
1483 ignore: vec![],
1484 line_numbers: false,
1485 preview: false,
1486 token_count: false,
1487 yes: false,
1488 diff_only: false,
1489 clear_cache: false,
1490 init: false,
1491 max_tokens: None,
1492 signatures: false,
1493 structure: false,
1494 truncate: "smart".to_string(),
1495 visibility: "all".to_string(),
1496 };
1497
1498 let diff_config = DiffConfig::default();
1499
1500 let sorted_paths: Vec<PathBuf> = files
1501 .iter()
1502 .map(|e| {
1503 e.path()
1504 .strip_prefix(base_path)
1505 .unwrap_or(e.path())
1506 .to_path_buf()
1507 })
1508 .collect();
1509
1510 let result = generate_markdown_with_diff(
1511 &state,
1512 None,
1513 &args,
1514 &file_tree,
1515 &diff_config,
1516 &sorted_paths,
1517 );
1518 assert!(result.is_ok());
1519
1520 let content = result.unwrap();
1521 assert!(content.contains("Directory Structure Report"));
1522 assert!(content.contains("test.rs"));
1523 }
1524}