context_builder/
lib.rs

1use chrono::Utc;
2use clap::{CommandFactory, Parser};
3
4use std::collections::{HashMap, HashSet};
5use std::fs;
6use std::io::{self, Write};
7use std::path::{Path, PathBuf};
8use std::time::Instant;
9use tempfile::NamedTempFile;
10
11pub mod cli;
12pub mod config;
13pub mod diff;
14pub mod file_utils;
15pub mod markdown;
16pub mod token_count;
17pub mod tree;
18
19use cli::Args;
20use config::load_config;
21use diff::{PerFileStatus, diff_file_contents, render_per_file_diffs};
22use file_utils::{collect_files, confirm_overwrite, confirm_processing};
23use markdown::generate_markdown;
24use token_count::{count_file_tokens, count_tree_tokens, estimate_tokens};
25use tree::{build_file_tree, print_tree};
26
27pub trait Prompter {
28    fn confirm_processing(&self, file_count: usize) -> io::Result<bool>;
29    fn confirm_overwrite(&self, file_path: &str) -> io::Result<bool>;
30}
31
32pub struct DefaultPrompter;
33
34impl Prompter for DefaultPrompter {
35    fn confirm_processing(&self, file_count: usize) -> io::Result<bool> {
36        confirm_processing(file_count)
37    }
38    fn confirm_overwrite(&self, file_path: &str) -> io::Result<bool> {
39        confirm_overwrite(file_path)
40    }
41}
42
43pub fn run_with_args(args: Args, prompter: &impl Prompter) -> io::Result<()> {
44    let start_time = Instant::now();
45
46    let silent = std::env::var("CB_SILENT")
47        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
48        .unwrap_or(false);
49
50    let base_path = Path::new(&args.input);
51
52    if !base_path.exists() || !base_path.is_dir() {
53        if !silent {
54            eprintln!(
55                "Error: The specified input directory '{}' does not exist or is not a directory.",
56                args.input
57            );
58        }
59        return Ok(());
60    }
61
62    let config = load_config().unwrap_or_default();
63    // Expose configured diff context lines (if provided) to the diff generator through env
64    if let Some(diff_ctx) = config.diff_context_lines
65        && std::env::var("CB_DIFF_CONTEXT_LINES").is_err()
66    {
67        unsafe {
68            std::env::set_var("CB_DIFF_CONTEXT_LINES", diff_ctx.to_string());
69        }
70    }
71
72    if !args.preview
73        && !args.token_count
74        && Path::new(&args.output).exists()
75        && !args.yes
76        && !prompter.confirm_overwrite(&args.output)?
77    {
78        if !silent {
79            println!("Operation cancelled.");
80        }
81        return Ok(());
82    }
83
84    let files = collect_files(base_path, &args.filter, &args.ignore)?;
85    let file_tree = build_file_tree(&files, base_path);
86
87    if args.preview {
88        if !silent {
89            println!("\n# File Tree Structure (Preview)\n");
90            print_tree(&file_tree, 0);
91        }
92        if !args.token_count {
93            return Ok(());
94        }
95    }
96
97    if args.token_count {
98        if !silent {
99            println!("\n# Token Count Estimation\n");
100            let mut total_tokens = 0;
101            total_tokens += estimate_tokens("# Directory Structure Report\n\n");
102            if !args.filter.is_empty() {
103                total_tokens += estimate_tokens(&format!(
104                    "This document contains files from the `{}` directory with extensions: {} \n",
105                    args.input,
106                    args.filter.join(", ")
107                ));
108            } else {
109                total_tokens += estimate_tokens(&format!(
110                    "This document contains all files from the `{}` directory, optimized for LLM consumption.\n",
111                    args.input
112                ));
113            }
114            if !args.ignore.is_empty() {
115                total_tokens += estimate_tokens(&format!(
116                    "Custom ignored patterns: {} \n",
117                    args.ignore.join(", ")
118                ));
119            }
120            total_tokens += estimate_tokens(&format!(
121                "Processed at: {}\n\n",
122                Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
123            ));
124            total_tokens += estimate_tokens("## File Tree Structure\n\n");
125            let tree_tokens = count_tree_tokens(&file_tree, 0);
126            total_tokens += tree_tokens;
127            let file_tokens: usize = files
128                .iter()
129                .map(|entry| count_file_tokens(base_path, entry, args.line_numbers))
130                .sum();
131            total_tokens += file_tokens;
132            println!("Estimated total tokens: {}", total_tokens);
133            println!("File tree tokens: {}", tree_tokens);
134            println!("File content tokens: {}", file_tokens);
135        }
136        return Ok(());
137    }
138
139    if !args.yes && !prompter.confirm_processing(files.len())? {
140        if !silent {
141            println!("Operation cancelled.");
142        }
143        return Ok(());
144    }
145
146    if config.auto_diff.unwrap_or(false) && config.timestamped_output.unwrap_or(false) {
147        // 1. Generate current canonical (no diff) into temp file
148        let output_path = Path::new(&args.output);
149        let temp_file = NamedTempFile::new()?;
150
151        generate_markdown(
152            temp_file.path().to_str().unwrap(),
153            &args.input,
154            &args.filter,
155            &args.ignore,
156            &file_tree,
157            &files,
158            base_path,
159            args.line_numbers,
160        )?;
161
162        // 2. Load previous canonical (if any)
163        let cache_dir = Path::new(".context-builder").join("cache");
164        if !cache_dir.exists() {
165            let _ = fs::create_dir_all(&cache_dir);
166        }
167        let cache_file = cache_dir.join("last_canonical.md");
168        let previous_canonical = fs::read_to_string(&cache_file).unwrap_or_default();
169        let new_canonical = fs::read_to_string(temp_file.path())?;
170
171        // 3. Extract per-file pure contents (only code blocks) for both versions
172        fn extract_file_contents(text: &str) -> (String, String, HashMap<String, String>) {
173            let mut prefix_end = text.len();
174            if let Some(idx) = text.find("\n### File: `") {
175                prefix_end = idx;
176            }
177            let (prefix, rest) = text.split_at(prefix_end);
178            let mut files_map: HashMap<String, String> = HashMap::new();
179            let files_raw = rest.trim_start().to_string();
180
181            let mut current_path: Option<String> = None;
182            let mut in_code = false;
183            let mut current_lines: Vec<String> = Vec::new();
184
185            fn strip_line_number(line: &str) -> &str {
186                let trimmed = line.trim_start();
187                if let Some(pipe_idx) = trimmed.find('|') {
188                    let (left, right) = trimmed.split_at(pipe_idx);
189                    if left.trim().chars().all(|c| c.is_ascii_digit()) {
190                        return right.trim_start_matches('|').trim_start();
191                    }
192                }
193                line
194            }
195
196            for line in rest.lines() {
197                if line.starts_with("### File: `") {
198                    if let Some(p) = current_path.take() {
199                        files_map.insert(p, current_lines.join("\n"));
200                        current_lines.clear();
201                    }
202                    if let Some(after) = line.strip_prefix("### File: `")
203                        && let Some(end) = after.find('`')
204                    {
205                        current_path = Some(after[..end].to_string());
206                    }
207                    in_code = false;
208                    continue;
209                }
210
211                if line.starts_with("```") {
212                    in_code = !in_code;
213                    continue;
214                }
215
216                if in_code {
217                    current_lines.push(strip_line_number(line).to_string());
218                }
219            }
220
221            if let Some(p) = current_path.take() {
222                files_map.insert(p, current_lines.join("\n"));
223            }
224
225            (prefix.trim_end().to_string(), files_raw, files_map)
226        }
227
228        let (_prev_prefix, _prev_files_raw, prev_map) = extract_file_contents(&previous_canonical);
229        let (new_prefix, new_files_raw, new_map) = extract_file_contents(&new_canonical);
230
231        // 4. Compute per-file diffs (skip unchanged)
232        let per_file_diffs = diff_file_contents(&prev_map, &new_map, true, None);
233
234        // 5. Partition changes
235        let mut added_paths: HashSet<&str> = HashSet::new();
236        let mut removed_paths: HashSet<&str> = HashSet::new();
237        let mut modified_paths: HashSet<&str> = HashSet::new();
238
239        for d in &per_file_diffs {
240            match d.status {
241                PerFileStatus::Added => {
242                    added_paths.insert(d.path.as_str());
243                }
244                PerFileStatus::Removed => {
245                    removed_paths.insert(d.path.as_str());
246                }
247                PerFileStatus::Modified => {
248                    modified_paths.insert(d.path.as_str());
249                }
250                PerFileStatus::Unchanged => {}
251            }
252        }
253
254        // 6. Prepare Files section with annotations for added files
255        // We only annotate the display; added files produce no diff section.
256        let mut files_section = new_files_raw.trim_start().to_string();
257        if !added_paths.is_empty() {
258            // For safety do replacements on a line-by-line rebuild to avoid nested replacements.
259            let mut rebuilt = String::new();
260            let lines = files_section.lines().peekable();
261            for line in lines {
262                if let Some(after) = line.strip_prefix("### File: `")
263                    && let Some(end) = after.find('`')
264                {
265                    let path = &after[..end];
266                    rebuilt.push_str(line);
267                    rebuilt.push('\n');
268                    // The original generator emits a blank line after heading; we add status before metadata
269                    if added_paths.contains(path) {
270                        rebuilt.push('\n');
271                        rebuilt.push_str("_Status: Added_\n");
272                    }
273                    continue;
274                }
275                rebuilt.push_str(line);
276                rebuilt.push('\n');
277            }
278            files_section = rebuilt;
279        }
280
281        // 7. Build final document
282        let mut final_doc = String::new();
283        final_doc.push_str(&new_prefix);
284        final_doc.push_str("\n\n");
285
286        // Change Summary always shows additions/removals/modifications if any
287        if !(added_paths.is_empty() && removed_paths.is_empty() && modified_paths.is_empty()) {
288            final_doc.push_str("## Change Summary\n\n");
289            for p in added_paths.iter().copied().collect::<Vec<_>>() {
290                final_doc.push_str(&format!("- Added: `{}`\n", p));
291            }
292            for p in removed_paths.iter().copied().collect::<Vec<_>>() {
293                final_doc.push_str(&format!("- Removed: `{}`\n", p));
294            }
295            for p in modified_paths.iter().copied().collect::<Vec<_>>() {
296                final_doc.push_str(&format!("- Modified: `{}`\n", p));
297            }
298            final_doc.push('\n');
299        }
300
301        // File Differences: ONLY modified files (no added / removed)
302        let modified_diffs: Vec<_> = per_file_diffs
303            .iter()
304            .filter(|d| matches!(d.status, PerFileStatus::Modified))
305            .collect();
306
307        if !modified_diffs.is_empty() {
308            final_doc.push_str("## File Differences\n\n");
309            // Render only modified diffs
310            final_doc.push_str(&render_per_file_diffs(
311                &modified_diffs
312                    .iter()
313                    .map(|d| (*d).clone())
314                    .collect::<Vec<_>>(),
315            ));
316            final_doc.push('\n');
317        }
318
319        // Only include full file bodies when not in diff-only mode
320        if !args.diff_only && !files_section.is_empty() {
321            final_doc.push_str("## Files\n\n");
322
323            final_doc.push_str(&files_section);
324
325            if !final_doc.ends_with('\n') {
326                final_doc.push('\n');
327            }
328        }
329
330        // 8. Write output
331        let mut final_output = fs::File::create(output_path)?;
332        final_output.write_all(final_doc.as_bytes())?;
333
334        // 9. Update canonical cache
335        if let Err(e) = fs::write(&cache_file, &new_canonical)
336            && !silent
337        {
338            eprintln!("Warning: failed to update canonical cache: {e}");
339        }
340
341        let duration = start_time.elapsed();
342        if !silent {
343            if modified_diffs.is_empty() {
344                println!(
345                    "Documentation created successfully (no modified file content): {}",
346                    args.output
347                );
348            } else {
349                println!(
350                    "Documentation created successfully with modified file diffs: {}",
351                    args.output
352                );
353            }
354            println!("Processing time: {:.2?}", duration);
355        }
356        return Ok(());
357    }
358
359    // Standard (non auto-diff) generation
360    generate_markdown(
361        &args.output,
362        &args.input,
363        &args.filter,
364        &args.ignore,
365        &file_tree,
366        &files,
367        base_path,
368        args.line_numbers,
369    )?;
370
371    let duration = start_time.elapsed();
372    if !silent {
373        println!("Documentation created successfully: {}", args.output);
374        println!("Processing time: {:.2?}", duration);
375    }
376
377    Ok(())
378}
379
380pub fn run() -> io::Result<()> {
381    env_logger::init();
382    let mut args = Args::parse();
383    let config = load_config();
384
385    if std::env::args().len() == 1 && config.is_none() {
386        Args::command().print_help()?;
387        return Ok(());
388    }
389
390    if let Some(config) = config {
391        if args.output == "output.md"
392            && let Some(output) = config.output
393        {
394            args.output = output;
395        }
396        if args.filter.is_empty()
397            && let Some(filter) = config.filter
398        {
399            args.filter = filter;
400        }
401        if args.ignore.is_empty()
402            && let Some(ignore) = config.ignore
403        {
404            args.ignore = ignore;
405        }
406        if !args.line_numbers
407            && let Some(line_numbers) = config.line_numbers
408        {
409            args.line_numbers = line_numbers;
410        }
411        if !args.preview
412            && let Some(preview) = config.preview
413        {
414            args.preview = preview;
415        }
416        if !args.token_count
417            && let Some(token_count) = config.token_count
418        {
419            args.token_count = token_count;
420        }
421        if !args.yes
422            && let Some(yes) = config.yes
423        {
424            args.yes = yes;
425        }
426
427        let mut output_folder_path: Option<PathBuf> = None;
428        if let Some(output_folder) = config.output_folder {
429            let mut path = PathBuf::from(output_folder.clone());
430            path.push(&args.output);
431            args.output = path.to_str().unwrap().to_string();
432            output_folder_path = Some(PathBuf::from(output_folder));
433        }
434
435        if let Some(true) = config.timestamped_output {
436            let timestamp = Utc::now().format("%Y%m%d%H%M%S").to_string();
437
438            let path = Path::new(&args.output);
439
440            let stem = path.file_stem().unwrap().to_str().unwrap();
441
442            let extension = path.extension().unwrap().to_str().unwrap();
443
444            let new_filename = format!("{}_{}.{}", stem, timestamp, extension);
445
446            if let Some(output_folder) = output_folder_path {
447                args.output = output_folder
448                    .join(new_filename)
449                    .to_str()
450                    .unwrap()
451                    .to_string();
452            } else {
453                let new_path = path.with_file_name(new_filename);
454
455                args.output = new_path.to_str().unwrap().to_string();
456            }
457        }
458
459        // Apply diff_only from config (CLI flag still has precedence if user supplied --diff-only)
460        if let Some(true) = config.diff_only {
461            args.diff_only = true;
462        }
463    }
464    run_with_args(args, &DefaultPrompter)
465}