1use chrono::Utc;
2use clap::{CommandFactory, Parser};
3
4use std::collections::{HashMap, HashSet};
5use std::fs;
6use std::io::{self, Write};
7use std::path::{Path, PathBuf};
8use std::time::Instant;
9use tempfile::NamedTempFile;
10
11pub mod cli;
12pub mod config;
13pub mod diff;
14pub mod file_utils;
15pub mod markdown;
16pub mod token_count;
17pub mod tree;
18
19use cli::Args;
20use config::load_config;
21use diff::{PerFileStatus, diff_file_contents, render_per_file_diffs};
22use file_utils::{collect_files, confirm_overwrite, confirm_processing};
23use markdown::generate_markdown;
24use token_count::{count_file_tokens, count_tree_tokens, estimate_tokens};
25use tree::{build_file_tree, print_tree};
26
27pub trait Prompter {
28 fn confirm_processing(&self, file_count: usize) -> io::Result<bool>;
29 fn confirm_overwrite(&self, file_path: &str) -> io::Result<bool>;
30}
31
32pub struct DefaultPrompter;
33
34impl Prompter for DefaultPrompter {
35 fn confirm_processing(&self, file_count: usize) -> io::Result<bool> {
36 confirm_processing(file_count)
37 }
38 fn confirm_overwrite(&self, file_path: &str) -> io::Result<bool> {
39 confirm_overwrite(file_path)
40 }
41}
42
43pub fn run_with_args(args: Args, prompter: &impl Prompter) -> io::Result<()> {
44 let start_time = Instant::now();
45
46 let silent = std::env::var("CB_SILENT")
47 .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
48 .unwrap_or(false);
49
50 let base_path = Path::new(&args.input);
51
52 if !base_path.exists() || !base_path.is_dir() {
53 if !silent {
54 eprintln!(
55 "Error: The specified input directory '{}' does not exist or is not a directory.",
56 args.input
57 );
58 }
59 return Ok(());
60 }
61
62 let config = load_config().unwrap_or_default();
63 if let Some(diff_ctx) = config.diff_context_lines
65 && std::env::var("CB_DIFF_CONTEXT_LINES").is_err()
66 {
67 unsafe {
68 std::env::set_var("CB_DIFF_CONTEXT_LINES", diff_ctx.to_string());
69 }
70 }
71
72 if !args.preview
73 && !args.token_count
74 && Path::new(&args.output).exists()
75 && !args.yes
76 && !prompter.confirm_overwrite(&args.output)?
77 {
78 if !silent {
79 println!("Operation cancelled.");
80 }
81 return Ok(());
82 }
83
84 let files = collect_files(base_path, &args.filter, &args.ignore)?;
85 let file_tree = build_file_tree(&files, base_path);
86
87 if args.preview {
88 if !silent {
89 println!("\n# File Tree Structure (Preview)\n");
90 print_tree(&file_tree, 0);
91 }
92 if !args.token_count {
93 return Ok(());
94 }
95 }
96
97 if args.token_count {
98 if !silent {
99 println!("\n# Token Count Estimation\n");
100 let mut total_tokens = 0;
101 total_tokens += estimate_tokens("# Directory Structure Report\n\n");
102 if !args.filter.is_empty() {
103 total_tokens += estimate_tokens(&format!(
104 "This document contains files from the `{}` directory with extensions: {} \n",
105 args.input,
106 args.filter.join(", ")
107 ));
108 } else {
109 total_tokens += estimate_tokens(&format!(
110 "This document contains all files from the `{}` directory, optimized for LLM consumption.\n",
111 args.input
112 ));
113 }
114 if !args.ignore.is_empty() {
115 total_tokens += estimate_tokens(&format!(
116 "Custom ignored patterns: {} \n",
117 args.ignore.join(", ")
118 ));
119 }
120 total_tokens += estimate_tokens(&format!(
121 "Processed at: {}\n\n",
122 Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
123 ));
124 total_tokens += estimate_tokens("## File Tree Structure\n\n");
125 let tree_tokens = count_tree_tokens(&file_tree, 0);
126 total_tokens += tree_tokens;
127 let file_tokens: usize = files
128 .iter()
129 .map(|entry| count_file_tokens(base_path, entry, args.line_numbers))
130 .sum();
131 total_tokens += file_tokens;
132 println!("Estimated total tokens: {}", total_tokens);
133 println!("File tree tokens: {}", tree_tokens);
134 println!("File content tokens: {}", file_tokens);
135 }
136 return Ok(());
137 }
138
139 if !args.yes && !prompter.confirm_processing(files.len())? {
140 if !silent {
141 println!("Operation cancelled.");
142 }
143 return Ok(());
144 }
145
146 if config.auto_diff.unwrap_or(false) && config.timestamped_output.unwrap_or(false) {
147 let output_path = Path::new(&args.output);
149 let temp_file = NamedTempFile::new()?;
150
151 generate_markdown(
152 temp_file.path().to_str().unwrap(),
153 &args.input,
154 &args.filter,
155 &args.ignore,
156 &file_tree,
157 &files,
158 base_path,
159 args.line_numbers,
160 )?;
161
162 let cache_dir = Path::new(".context-builder").join("cache");
164 if !cache_dir.exists() {
165 let _ = fs::create_dir_all(&cache_dir);
166 }
167 let cache_file = cache_dir.join("last_canonical.md");
168 let previous_canonical = fs::read_to_string(&cache_file).unwrap_or_default();
169 let new_canonical = fs::read_to_string(temp_file.path())?;
170
171 fn extract_file_contents(text: &str) -> (String, String, HashMap<String, String>) {
173 let mut prefix_end = text.len();
174 if let Some(idx) = text.find("\n### File: `") {
175 prefix_end = idx;
176 }
177 let (prefix, rest) = text.split_at(prefix_end);
178 let mut files_map: HashMap<String, String> = HashMap::new();
179 let files_raw = rest.trim_start().to_string();
180
181 let mut current_path: Option<String> = None;
182 let mut in_code = false;
183 let mut current_lines: Vec<String> = Vec::new();
184
185 fn strip_line_number(line: &str) -> &str {
186 let trimmed = line.trim_start();
187 if let Some(pipe_idx) = trimmed.find('|') {
188 let (left, right) = trimmed.split_at(pipe_idx);
189 if left.trim().chars().all(|c| c.is_ascii_digit()) {
190 return right.trim_start_matches('|').trim_start();
191 }
192 }
193 line
194 }
195
196 for line in rest.lines() {
197 if line.starts_with("### File: `") {
198 if let Some(p) = current_path.take() {
199 files_map.insert(p, current_lines.join("\n"));
200 current_lines.clear();
201 }
202 if let Some(after) = line.strip_prefix("### File: `")
203 && let Some(end) = after.find('`')
204 {
205 current_path = Some(after[..end].to_string());
206 }
207 in_code = false;
208 continue;
209 }
210
211 if line.starts_with("```") {
212 in_code = !in_code;
213 continue;
214 }
215
216 if in_code {
217 current_lines.push(strip_line_number(line).to_string());
218 }
219 }
220
221 if let Some(p) = current_path.take() {
222 files_map.insert(p, current_lines.join("\n"));
223 }
224
225 (prefix.trim_end().to_string(), files_raw, files_map)
226 }
227
228 let (_prev_prefix, _prev_files_raw, prev_map) = extract_file_contents(&previous_canonical);
229 let (new_prefix, new_files_raw, new_map) = extract_file_contents(&new_canonical);
230
231 let per_file_diffs = diff_file_contents(&prev_map, &new_map, true, None);
233
234 let mut added_paths: HashSet<&str> = HashSet::new();
236 let mut removed_paths: HashSet<&str> = HashSet::new();
237 let mut modified_paths: HashSet<&str> = HashSet::new();
238
239 for d in &per_file_diffs {
240 match d.status {
241 PerFileStatus::Added => {
242 added_paths.insert(d.path.as_str());
243 }
244 PerFileStatus::Removed => {
245 removed_paths.insert(d.path.as_str());
246 }
247 PerFileStatus::Modified => {
248 modified_paths.insert(d.path.as_str());
249 }
250 PerFileStatus::Unchanged => {}
251 }
252 }
253
254 let mut files_section = new_files_raw.trim_start().to_string();
257 if !added_paths.is_empty() {
258 let mut rebuilt = String::new();
260 let lines = files_section.lines().peekable();
261 for line in lines {
262 if let Some(after) = line.strip_prefix("### File: `")
263 && let Some(end) = after.find('`')
264 {
265 let path = &after[..end];
266 rebuilt.push_str(line);
267 rebuilt.push('\n');
268 if added_paths.contains(path) {
270 rebuilt.push('\n');
271 rebuilt.push_str("_Status: Added_\n");
272 }
273 continue;
274 }
275 rebuilt.push_str(line);
276 rebuilt.push('\n');
277 }
278 files_section = rebuilt;
279 }
280
281 let mut final_doc = String::new();
283 final_doc.push_str(&new_prefix);
284 final_doc.push_str("\n\n");
285
286 if !(added_paths.is_empty() && removed_paths.is_empty() && modified_paths.is_empty()) {
288 final_doc.push_str("## Change Summary\n\n");
289 for p in added_paths.iter().copied().collect::<Vec<_>>() {
290 final_doc.push_str(&format!("- Added: `{}`\n", p));
291 }
292 for p in removed_paths.iter().copied().collect::<Vec<_>>() {
293 final_doc.push_str(&format!("- Removed: `{}`\n", p));
294 }
295 for p in modified_paths.iter().copied().collect::<Vec<_>>() {
296 final_doc.push_str(&format!("- Modified: `{}`\n", p));
297 }
298 final_doc.push('\n');
299 }
300
301 let modified_diffs: Vec<_> = per_file_diffs
303 .iter()
304 .filter(|d| matches!(d.status, PerFileStatus::Modified))
305 .collect();
306
307 if !modified_diffs.is_empty() {
308 final_doc.push_str("## File Differences\n\n");
309 final_doc.push_str(&render_per_file_diffs(
311 &modified_diffs
312 .iter()
313 .map(|d| (*d).clone())
314 .collect::<Vec<_>>(),
315 ));
316 final_doc.push('\n');
317 }
318
319 if !args.diff_only && !files_section.is_empty() {
321 final_doc.push_str("## Files\n\n");
322
323 final_doc.push_str(&files_section);
324
325 if !final_doc.ends_with('\n') {
326 final_doc.push('\n');
327 }
328 }
329
330 let mut final_output = fs::File::create(output_path)?;
332 final_output.write_all(final_doc.as_bytes())?;
333
334 if let Err(e) = fs::write(&cache_file, &new_canonical)
336 && !silent
337 {
338 eprintln!("Warning: failed to update canonical cache: {e}");
339 }
340
341 let duration = start_time.elapsed();
342 if !silent {
343 if modified_diffs.is_empty() {
344 println!(
345 "Documentation created successfully (no modified file content): {}",
346 args.output
347 );
348 } else {
349 println!(
350 "Documentation created successfully with modified file diffs: {}",
351 args.output
352 );
353 }
354 println!("Processing time: {:.2?}", duration);
355 }
356 return Ok(());
357 }
358
359 generate_markdown(
361 &args.output,
362 &args.input,
363 &args.filter,
364 &args.ignore,
365 &file_tree,
366 &files,
367 base_path,
368 args.line_numbers,
369 )?;
370
371 let duration = start_time.elapsed();
372 if !silent {
373 println!("Documentation created successfully: {}", args.output);
374 println!("Processing time: {:.2?}", duration);
375 }
376
377 Ok(())
378}
379
380pub fn run() -> io::Result<()> {
381 env_logger::init();
382 let mut args = Args::parse();
383 let config = load_config();
384
385 if std::env::args().len() == 1 && config.is_none() {
386 Args::command().print_help()?;
387 return Ok(());
388 }
389
390 if let Some(config) = config {
391 if args.output == "output.md"
392 && let Some(output) = config.output
393 {
394 args.output = output;
395 }
396 if args.filter.is_empty()
397 && let Some(filter) = config.filter
398 {
399 args.filter = filter;
400 }
401 if args.ignore.is_empty()
402 && let Some(ignore) = config.ignore
403 {
404 args.ignore = ignore;
405 }
406 if !args.line_numbers
407 && let Some(line_numbers) = config.line_numbers
408 {
409 args.line_numbers = line_numbers;
410 }
411 if !args.preview
412 && let Some(preview) = config.preview
413 {
414 args.preview = preview;
415 }
416 if !args.token_count
417 && let Some(token_count) = config.token_count
418 {
419 args.token_count = token_count;
420 }
421 if !args.yes
422 && let Some(yes) = config.yes
423 {
424 args.yes = yes;
425 }
426
427 let mut output_folder_path: Option<PathBuf> = None;
428 if let Some(output_folder) = config.output_folder {
429 let mut path = PathBuf::from(output_folder.clone());
430 path.push(&args.output);
431 args.output = path.to_str().unwrap().to_string();
432 output_folder_path = Some(PathBuf::from(output_folder));
433 }
434
435 if let Some(true) = config.timestamped_output {
436 let timestamp = Utc::now().format("%Y%m%d%H%M%S").to_string();
437
438 let path = Path::new(&args.output);
439
440 let stem = path.file_stem().unwrap().to_str().unwrap();
441
442 let extension = path.extension().unwrap().to_str().unwrap();
443
444 let new_filename = format!("{}_{}.{}", stem, timestamp, extension);
445
446 if let Some(output_folder) = output_folder_path {
447 args.output = output_folder
448 .join(new_filename)
449 .to_str()
450 .unwrap()
451 .to_string();
452 } else {
453 let new_path = path.with_file_name(new_filename);
454
455 args.output = new_path.to_str().unwrap().to_string();
456 }
457 }
458
459 if let Some(true) = config.diff_only {
461 args.diff_only = true;
462 }
463 }
464 run_with_args(args, &DefaultPrompter)
465}