1use crate::core::cache::FileCache;
4use crate::core::walker::FileInfo;
5use crate::utils::file_ext::FileType;
6use anyhow::Result;
7use std::collections::HashMap;
8use std::path::Path;
9use std::sync::Arc;
10
11#[derive(Debug, Clone)]
13pub struct DigestOptions {
14 pub max_tokens: Option<usize>,
16 pub include_tree: bool,
18 pub include_stats: bool,
20 pub group_by_type: bool,
22 pub sort_by_priority: bool,
24 pub file_header_template: String,
26 pub doc_header_template: String,
28 pub include_toc: bool,
30 pub enhanced_context: bool,
32}
33
34impl DigestOptions {
35 pub fn from_config(config: &crate::cli::Config) -> Result<Self> {
37 Ok(DigestOptions {
38 max_tokens: config.max_tokens,
39 include_tree: true,
40 include_stats: true,
41 group_by_type: false,
42 sort_by_priority: true,
43 file_header_template: "## {path}".to_string(),
44 doc_header_template: "# Code Digest: {directory}".to_string(),
45 include_toc: true,
46 enhanced_context: config.enhanced_context,
47 })
48 }
49}
50
51impl Default for DigestOptions {
52 fn default() -> Self {
53 DigestOptions {
54 max_tokens: None,
55 include_tree: true,
56 include_stats: true,
57 group_by_type: false,
58 sort_by_priority: true,
59 file_header_template: "## {path}".to_string(),
60 doc_header_template: "# Code Digest: {directory}".to_string(),
61 include_toc: true,
62 enhanced_context: false,
63 }
64 }
65}
66
67fn estimate_output_size(files: &[FileInfo], options: &DigestOptions, cache: &FileCache) -> usize {
69 let mut size = 0;
70
71 if !options.doc_header_template.is_empty() {
73 size += options.doc_header_template.len() + 50; }
75
76 if options.include_stats {
78 size += 500; size += files.len() * 50; }
81
82 if options.include_tree {
84 size += 100; size += files.len() * 100; }
87
88 if options.include_toc {
90 size += 50; size += files.len() * 100; }
93
94 for file in files {
96 size +=
98 options.file_header_template.len() + file.relative_path.to_string_lossy().len() + 20;
99
100 if let Ok(content) = cache.get_or_load(&file.path) {
102 size += content.len() + 20; } else {
104 size += file.size as usize; }
106 }
107
108 size + (size / 5)
110}
111
112pub fn generate_markdown(
114 files: Vec<FileInfo>,
115 options: DigestOptions,
116 cache: Arc<FileCache>,
117) -> Result<String> {
118 let estimated_size = estimate_output_size(&files, &options, &cache);
120 let mut output = String::with_capacity(estimated_size);
121
122 if !options.doc_header_template.is_empty() {
124 let header = options.doc_header_template.replace("{directory}", ".");
125 output.push_str(&header);
126 output.push_str("\n\n");
127 }
128
129 if options.include_stats {
131 let stats = generate_statistics(&files);
132 output.push_str(&stats);
133 output.push_str("\n\n");
134 }
135
136 if options.include_tree {
138 let tree = generate_file_tree(&files, &options);
139 output.push_str("## File Structure\n\n");
140 output.push_str("```\n");
141 output.push_str(&tree);
142 output.push_str("```\n\n");
143 }
144
145 let mut files = files;
147 if options.sort_by_priority {
148 files.sort_by(|a, b| {
149 b.priority
150 .partial_cmp(&a.priority)
151 .unwrap_or(std::cmp::Ordering::Equal)
152 .then_with(|| a.relative_path.cmp(&b.relative_path))
153 });
154 }
155
156 if options.include_toc {
158 output.push_str("## Table of Contents\n\n");
159 for file in &files {
160 let anchor = path_to_anchor(&file.relative_path);
161 output.push_str(&format!(
162 "- [{path}](#{anchor})\n",
163 path = file.relative_path.display(),
164 anchor = anchor
165 ));
166 }
167 output.push('\n');
168 }
169
170 if options.group_by_type {
172 let grouped = group_files_by_type(files);
173 for (file_type, group_files) in grouped {
174 output.push_str(&format!("## {} Files\n\n", file_type_display(&file_type)));
175 for file in group_files {
176 append_file_content(&mut output, &file, &options, &cache)?;
177 }
178 }
179 } else {
180 for file in files {
182 append_file_content(&mut output, &file, &options, &cache)?;
183 }
184 }
185
186 Ok(output)
187}
188
189fn append_file_content(
191 output: &mut String,
192 file: &FileInfo,
193 options: &DigestOptions,
194 cache: &FileCache,
195) -> Result<()> {
196 let content = match cache.get_or_load(&file.path) {
198 Ok(content) => content,
199 Err(e) => {
200 eprintln!("Warning: Could not read file {}: {}", file.path.display(), e);
201 return Ok(());
202 }
203 };
204
205 let path_with_metadata = if options.enhanced_context {
207 format!(
208 "{} ({}, {})",
209 file.relative_path.display(),
210 format_size(file.size),
211 file_type_display(&file.file_type)
212 )
213 } else {
214 file.relative_path.display().to_string()
215 };
216
217 let header = options.file_header_template.replace("{path}", &path_with_metadata);
218 output.push_str(&header);
219 output.push_str("\n\n");
220
221 let language = get_language_hint(&file.file_type);
223 output.push_str(&format!("```{language}\n"));
224 output.push_str(&content);
225 if !content.ends_with('\n') {
226 output.push('\n');
227 }
228 output.push_str("```\n\n");
229
230 Ok(())
231}
232
233fn generate_statistics(files: &[FileInfo]) -> String {
235 let total_files = files.len();
236 let total_size: u64 = files.iter().map(|f| f.size).sum();
237
238 let mut type_counts: HashMap<FileType, usize> = HashMap::new();
240 for file in files {
241 *type_counts.entry(file.file_type.clone()).or_insert(0) += 1;
242 }
243
244 let mut stats = String::with_capacity(500 + type_counts.len() * 50);
246 stats.push_str("## Statistics\n\n");
247 stats.push_str(&format!("- Total files: {total_files}\n"));
248 stats.push_str(&format!("- Total size: {} bytes\n", format_size(total_size)));
249 stats.push_str("\n### Files by type:\n");
250
251 let mut types: Vec<_> = type_counts.into_iter().collect();
252 types.sort_by_key(|(_, count)| std::cmp::Reverse(*count));
253
254 for (file_type, count) in types {
255 stats.push_str(&format!("- {}: {}\n", file_type_display(&file_type), count));
256 }
257
258 stats
259}
260
261fn generate_file_tree(files: &[FileInfo], options: &DigestOptions) -> String {
263 use std::collections::{BTreeMap, HashMap};
264
265 #[derive(Default)]
266 struct TreeNode {
267 files: Vec<String>,
268 dirs: BTreeMap<String, TreeNode>,
269 }
270
271 let mut root = TreeNode::default();
272
273 let file_lookup: HashMap<String, &FileInfo> =
275 files.iter().map(|f| (f.relative_path.to_string_lossy().to_string(), f)).collect();
276
277 for file in files {
279 let parts: Vec<_> = file
280 .relative_path
281 .components()
282 .map(|c| c.as_os_str().to_string_lossy().to_string())
283 .collect();
284
285 let mut current = &mut root;
286 for (i, part) in parts.iter().enumerate() {
287 if i == parts.len() - 1 {
288 current.files.push(part.clone());
290 } else {
291 current = current.dirs.entry(part.clone()).or_default();
293 }
294 }
295 }
296
297 fn render_tree(
299 node: &TreeNode,
300 prefix: &str,
301 _is_last: bool,
302 current_path: &str,
303 file_lookup: &HashMap<String, &FileInfo>,
304 options: &DigestOptions,
305 ) -> String {
306 let estimated_size = (node.dirs.len() + node.files.len()) * 100;
308 let mut output = String::with_capacity(estimated_size);
309
310 let dir_count = node.dirs.len();
312 for (i, (name, child)) in node.dirs.iter().enumerate() {
313 let is_last_dir = i == dir_count - 1 && node.files.is_empty();
314 let connector = if is_last_dir { "└── " } else { "├── " };
315 let extension = if is_last_dir { " " } else { "│ " };
316
317 output.push_str(&format!("{prefix}{connector}{name}/\n"));
318 let child_path = if current_path.is_empty() {
319 name.clone()
320 } else {
321 format!("{current_path}/{name}")
322 };
323 output.push_str(&render_tree(
324 child,
325 &format!("{prefix}{extension}"),
326 is_last_dir,
327 &child_path,
328 file_lookup,
329 options,
330 ));
331 }
332
333 let file_count = node.files.len();
335 for (i, name) in node.files.iter().enumerate() {
336 let is_last_file = i == file_count - 1;
337 let connector = if is_last_file { "└── " } else { "├── " };
338
339 let file_path = if current_path.is_empty() {
340 name.clone()
341 } else {
342 format!("{current_path}/{name}")
343 };
344
345 let display_name = if options.enhanced_context {
347 if let Some(file_info) = file_lookup.get(&file_path) {
348 format!(
349 "{} ({}, {})",
350 name,
351 format_size(file_info.size),
352 file_type_display(&file_info.file_type)
353 )
354 } else {
355 name.clone()
356 }
357 } else {
358 name.clone()
359 };
360
361 output.push_str(&format!("{prefix}{connector}{display_name}\n"));
362 }
363
364 output
365 }
366
367 let mut output = String::with_capacity(files.len() * 100 + 10);
369 output.push_str(".\n");
370 output.push_str(&render_tree(&root, "", true, "", &file_lookup, options));
371 output
372}
373
374fn group_files_by_type(files: Vec<FileInfo>) -> Vec<(FileType, Vec<FileInfo>)> {
376 let mut groups: HashMap<FileType, Vec<FileInfo>> = HashMap::new();
377
378 for file in files {
379 groups.entry(file.file_type.clone()).or_default().push(file);
380 }
381
382 let mut result: Vec<_> = groups.into_iter().collect();
383 result.sort_by_key(|(file_type, _)| file_type_priority(file_type));
384 result
385}
386
387fn file_type_display(file_type: &FileType) -> &'static str {
389 match file_type {
390 FileType::Rust => "Rust",
391 FileType::Python => "Python",
392 FileType::JavaScript => "JavaScript",
393 FileType::TypeScript => "TypeScript",
394 FileType::Go => "Go",
395 FileType::Java => "Java",
396 FileType::Cpp => "C++",
397 FileType::C => "C",
398 FileType::CSharp => "C#",
399 FileType::Ruby => "Ruby",
400 FileType::Php => "PHP",
401 FileType::Swift => "Swift",
402 FileType::Kotlin => "Kotlin",
403 FileType::Scala => "Scala",
404 FileType::Haskell => "Haskell",
405 FileType::Markdown => "Markdown",
406 FileType::Json => "JSON",
407 FileType::Yaml => "YAML",
408 FileType::Toml => "TOML",
409 FileType::Xml => "XML",
410 FileType::Html => "HTML",
411 FileType::Css => "CSS",
412 FileType::Text => "Text",
413 FileType::Other => "Other",
414 }
415}
416
417fn get_language_hint(file_type: &FileType) -> &'static str {
419 match file_type {
420 FileType::Rust => "rust",
421 FileType::Python => "python",
422 FileType::JavaScript => "javascript",
423 FileType::TypeScript => "typescript",
424 FileType::Go => "go",
425 FileType::Java => "java",
426 FileType::Cpp => "cpp",
427 FileType::C => "c",
428 FileType::CSharp => "csharp",
429 FileType::Ruby => "ruby",
430 FileType::Php => "php",
431 FileType::Swift => "swift",
432 FileType::Kotlin => "kotlin",
433 FileType::Scala => "scala",
434 FileType::Haskell => "haskell",
435 FileType::Markdown => "markdown",
436 FileType::Json => "json",
437 FileType::Yaml => "yaml",
438 FileType::Toml => "toml",
439 FileType::Xml => "xml",
440 FileType::Html => "html",
441 FileType::Css => "css",
442 FileType::Text => "text",
443 FileType::Other => "",
444 }
445}
446
447fn file_type_priority(file_type: &FileType) -> u8 {
449 match file_type {
450 FileType::Rust => 1,
451 FileType::Python => 2,
452 FileType::JavaScript => 3,
453 FileType::TypeScript => 3,
454 FileType::Go => 4,
455 FileType::Java => 5,
456 FileType::Cpp => 6,
457 FileType::C => 7,
458 FileType::CSharp => 8,
459 FileType::Ruby => 9,
460 FileType::Php => 10,
461 FileType::Swift => 11,
462 FileType::Kotlin => 12,
463 FileType::Scala => 13,
464 FileType::Haskell => 14,
465 FileType::Markdown => 15,
466 FileType::Json => 16,
467 FileType::Yaml => 17,
468 FileType::Toml => 18,
469 FileType::Xml => 19,
470 FileType::Html => 20,
471 FileType::Css => 21,
472 FileType::Text => 22,
473 FileType::Other => 23,
474 }
475}
476
477fn path_to_anchor(path: &Path) -> String {
479 path.display().to_string().replace(['/', '\\', '.', ' '], "-").to_lowercase()
480}
481
482fn format_size(size: u64) -> String {
484 const UNITS: &[&str] = &["B", "KB", "MB", "GB"];
485 let mut size = size as f64;
486 let mut unit_index = 0;
487
488 while size >= 1024.0 && unit_index < UNITS.len() - 1 {
489 size /= 1024.0;
490 unit_index += 1;
491 }
492
493 if unit_index == 0 {
494 format!("{} {}", size as u64, UNITS[unit_index])
495 } else {
496 format!("{:.2} {}", size, UNITS[unit_index])
497 }
498}
499
500#[cfg(test)]
501mod tests {
502 use super::*;
503 use std::path::PathBuf;
504
505 fn create_test_cache() -> Arc<FileCache> {
506 Arc::new(FileCache::new())
507 }
508
509 #[test]
510 fn test_format_size() {
511 assert_eq!(format_size(512), "512 B");
512 assert_eq!(format_size(1024), "1.00 KB");
513 assert_eq!(format_size(1536), "1.50 KB");
514 assert_eq!(format_size(1048576), "1.00 MB");
515 }
516
517 #[test]
518 fn test_path_to_anchor() {
519 assert_eq!(path_to_anchor(Path::new("src/main.rs")), "src-main-rs");
520 assert_eq!(path_to_anchor(Path::new("test file.txt")), "test-file-txt");
521 }
522
523 #[test]
524 fn test_file_type_display() {
525 assert_eq!(file_type_display(&FileType::Rust), "Rust");
526 assert_eq!(file_type_display(&FileType::Python), "Python");
527 }
528
529 #[test]
530 fn test_generate_statistics() {
531 let files = vec![
532 FileInfo {
533 path: PathBuf::from("test1.rs"),
534 relative_path: PathBuf::from("test1.rs"),
535 size: 100,
536 file_type: FileType::Rust,
537 priority: 1.0,
538 },
539 FileInfo {
540 path: PathBuf::from("test2.py"),
541 relative_path: PathBuf::from("test2.py"),
542 size: 200,
543 file_type: FileType::Python,
544 priority: 0.9,
545 },
546 ];
547
548 let stats = generate_statistics(&files);
549 assert!(stats.contains("Total files: 2"));
550 assert!(stats.contains("Total size: 300 B"));
551 assert!(stats.contains("Rust: 1"));
552 assert!(stats.contains("Python: 1"));
553 }
554
555 #[test]
556 fn test_generate_statistics_empty() {
557 let files = vec![];
558 let stats = generate_statistics(&files);
559 assert!(stats.contains("Total files: 0"));
560 assert!(stats.contains("Total size: 0 B"));
561 }
562
563 #[test]
564 fn test_generate_statistics_large_files() {
565 let files = vec![
566 FileInfo {
567 path: PathBuf::from("large.rs"),
568 relative_path: PathBuf::from("large.rs"),
569 size: 2_000_000, file_type: FileType::Rust,
571 priority: 1.0,
572 },
573 FileInfo {
574 path: PathBuf::from("huge.py"),
575 relative_path: PathBuf::from("huge.py"),
576 size: 50_000_000, file_type: FileType::Python,
578 priority: 0.9,
579 },
580 ];
581
582 let stats = generate_statistics(&files);
583 assert!(stats.contains("Total files: 2"));
584 assert!(stats.contains("MB bytes")); assert!(stats.contains("Python: 1"));
586 assert!(stats.contains("Rust: 1"));
587 }
588
589 #[test]
590 fn test_generate_file_tree_with_grouping() {
591 let files = vec![
592 FileInfo {
593 path: PathBuf::from("src/main.rs"),
594 relative_path: PathBuf::from("src/main.rs"),
595 size: 1000,
596 file_type: FileType::Rust,
597 priority: 1.5,
598 },
599 FileInfo {
600 path: PathBuf::from("src/lib.rs"),
601 relative_path: PathBuf::from("src/lib.rs"),
602 size: 2000,
603 file_type: FileType::Rust,
604 priority: 1.2,
605 },
606 FileInfo {
607 path: PathBuf::from("tests/test.rs"),
608 relative_path: PathBuf::from("tests/test.rs"),
609 size: 500,
610 file_type: FileType::Rust,
611 priority: 0.8,
612 },
613 ];
614
615 let options = DigestOptions::default();
616 let tree = generate_file_tree(&files, &options);
617 assert!(tree.contains("src/"));
618 assert!(tree.contains("tests/"));
619 assert!(tree.contains("main.rs"));
620 assert!(tree.contains("lib.rs"));
621 assert!(tree.contains("test.rs"));
622 }
623
624 #[test]
625 fn test_digest_options_from_config() {
626 use crate::cli::Config;
627 use tempfile::TempDir;
628
629 let temp_dir = TempDir::new().unwrap();
630 let config = Config {
631 prompt: None,
632 paths: Some(vec![temp_dir.path().to_path_buf()]),
633 output_file: None,
634 max_tokens: Some(100000),
635 llm_tool: crate::cli::LlmTool::default(),
636 quiet: false,
637 verbose: false,
638 config: None,
639 progress: false,
640 repo: None,
641 read_stdin: false,
642 copy: false,
643 enhanced_context: false,
644 custom_priorities: vec![],
645 };
646
647 let options = DigestOptions::from_config(&config).unwrap();
648 assert_eq!(options.max_tokens, Some(100000));
649 assert!(options.include_tree);
650 assert!(options.include_stats);
651 assert!(!options.group_by_type); }
653
654 #[test]
655 fn test_generate_markdown_structure_headers() {
656 let files = vec![];
657
658 let options = DigestOptions {
659 max_tokens: None,
660 include_tree: true,
661 include_stats: true,
662 group_by_type: true,
663 sort_by_priority: true,
664 file_header_template: "## {path}".to_string(),
665 doc_header_template: "# Code Digest".to_string(),
666 include_toc: true,
667 enhanced_context: false,
668 };
669
670 let cache = create_test_cache();
671 let markdown = generate_markdown(files, options, cache).unwrap();
672
673 assert!(markdown.contains("# Code Digest"));
675 assert!(markdown.contains("## Statistics"));
676 }
677
678 #[test]
679 fn test_enhanced_tree_generation_with_metadata() {
680 use crate::core::walker::FileInfo;
681 use crate::utils::file_ext::FileType;
682 use std::path::PathBuf;
683
684 let files = vec![
685 FileInfo {
686 path: PathBuf::from("src/main.rs"),
687 relative_path: PathBuf::from("src/main.rs"),
688 size: 145,
689 file_type: FileType::Rust,
690 priority: 1.5,
691 },
692 FileInfo {
693 path: PathBuf::from("src/lib.rs"),
694 relative_path: PathBuf::from("src/lib.rs"),
695 size: 89,
696 file_type: FileType::Rust,
697 priority: 1.2,
698 },
699 ];
700
701 let options = DigestOptions {
702 max_tokens: None,
703 include_tree: true,
704 include_stats: true,
705 group_by_type: false,
706 sort_by_priority: true,
707 file_header_template: "## {path}".to_string(),
708 doc_header_template: "# Code Digest".to_string(),
709 include_toc: true,
710 enhanced_context: true,
711 };
712
713 let cache = create_test_cache();
714 let markdown = generate_markdown(files, options, cache).unwrap();
715
716 assert!(markdown.contains("main.rs (145 B, Rust)"));
718 assert!(markdown.contains("lib.rs (89 B, Rust)"));
719 }
720
721 #[test]
722 fn test_enhanced_file_headers_with_metadata() {
723 use crate::core::walker::FileInfo;
724 use crate::utils::file_ext::FileType;
725 use std::path::PathBuf;
726
727 let files = vec![FileInfo {
728 path: PathBuf::from("src/main.rs"),
729 relative_path: PathBuf::from("src/main.rs"),
730 size: 145,
731 file_type: FileType::Rust,
732 priority: 1.5,
733 }];
734
735 let options = DigestOptions {
736 max_tokens: None,
737 include_tree: true,
738 include_stats: true,
739 group_by_type: false,
740 sort_by_priority: true,
741 file_header_template: "## {path}".to_string(),
742 doc_header_template: "# Code Digest".to_string(),
743 include_toc: true,
744 enhanced_context: true,
745 };
746
747 let cache = create_test_cache();
748 let markdown = generate_markdown(files, options, cache).unwrap();
749
750 assert!(markdown.contains("## src/main.rs (145 B, Rust)"));
752 }
753
754 #[test]
755 fn test_basic_mode_unchanged() {
756 use crate::core::walker::FileInfo;
757 use crate::utils::file_ext::FileType;
758 use std::path::PathBuf;
759
760 let files = vec![FileInfo {
761 path: PathBuf::from("src/main.rs"),
762 relative_path: PathBuf::from("src/main.rs"),
763 size: 145,
764 file_type: FileType::Rust,
765 priority: 1.5,
766 }];
767
768 let options = DigestOptions {
769 max_tokens: None,
770 include_tree: true,
771 include_stats: true,
772 group_by_type: false,
773 sort_by_priority: true,
774 file_header_template: "## {path}".to_string(),
775 doc_header_template: "# Code Digest".to_string(),
776 include_toc: true,
777 enhanced_context: false,
778 };
779
780 let cache = create_test_cache();
781 let markdown = generate_markdown(files, options, cache).unwrap();
782
783 assert!(markdown.contains("## src/main.rs"));
785 assert!(!markdown.contains("## src/main.rs (145 B, Rust)"));
786 assert!(markdown.contains("main.rs") && !markdown.contains("main.rs (145 B, Rust)"));
787 }
788}