code_digest/core/
digest.rs

1//! Markdown generation functionality
2
3use crate::core::cache::FileCache;
4use crate::core::walker::FileInfo;
5use crate::utils::file_ext::FileType;
6use anyhow::Result;
7use std::collections::HashMap;
8use std::path::Path;
9use std::sync::Arc;
10
11/// Options for generating markdown digest
12#[derive(Debug, Clone)]
13pub struct DigestOptions {
14    /// Maximum tokens allowed in the output
15    pub max_tokens: Option<usize>,
16    /// Include file tree in output
17    pub include_tree: bool,
18    /// Include token count statistics
19    pub include_stats: bool,
20    /// Group files by type
21    pub group_by_type: bool,
22    /// Sort files by priority
23    pub sort_by_priority: bool,
24    /// Template for file headers
25    pub file_header_template: String,
26    /// Template for the document header
27    pub doc_header_template: String,
28    /// Include table of contents
29    pub include_toc: bool,
30    /// Enable enhanced context with file metadata
31    pub enhanced_context: bool,
32}
33
34impl DigestOptions {
35    /// Create DigestOptions from CLI config
36    pub fn from_config(config: &crate::cli::Config) -> Result<Self> {
37        Ok(DigestOptions {
38            max_tokens: config.max_tokens,
39            include_tree: true,
40            include_stats: true,
41            group_by_type: false,
42            sort_by_priority: true,
43            file_header_template: "## {path}".to_string(),
44            doc_header_template: "# Code Digest: {directory}".to_string(),
45            include_toc: true,
46            enhanced_context: config.enhanced_context,
47        })
48    }
49}
50
51impl Default for DigestOptions {
52    fn default() -> Self {
53        DigestOptions {
54            max_tokens: None,
55            include_tree: true,
56            include_stats: true,
57            group_by_type: false,
58            sort_by_priority: true,
59            file_header_template: "## {path}".to_string(),
60            doc_header_template: "# Code Digest: {directory}".to_string(),
61            include_toc: true,
62            enhanced_context: false,
63        }
64    }
65}
66
67/// Estimate the total size of the markdown output
68fn estimate_output_size(files: &[FileInfo], options: &DigestOptions, cache: &FileCache) -> usize {
69    let mut size = 0;
70
71    // Document header
72    if !options.doc_header_template.is_empty() {
73        size += options.doc_header_template.len() + 50; // Extra for replacements and newlines
74    }
75
76    // Statistics section
77    if options.include_stats {
78        size += 500; // Estimated size for stats
79        size += files.len() * 50; // For file type listing
80    }
81
82    // File tree
83    if options.include_tree {
84        size += 100; // Headers
85        size += files.len() * 100; // Estimated per-file in tree
86    }
87
88    // Table of contents
89    if options.include_toc {
90        size += 50; // Header
91        size += files.len() * 100; // Per-file TOC entry
92    }
93
94    // File contents
95    for file in files {
96        // Header template
97        size +=
98            options.file_header_template.len() + file.relative_path.to_string_lossy().len() + 20;
99
100        // File content + code fence
101        if let Ok(content) = cache.get_or_load(&file.path) {
102            size += content.len() + 20; // Content + fence markers
103        } else {
104            size += file.size as usize; // Fallback to file size
105        }
106    }
107
108    // Add 20% buffer for formatting and unexpected overhead
109    size + (size / 5)
110}
111
112/// Generate markdown from a list of files
113pub fn generate_markdown(
114    files: Vec<FileInfo>,
115    options: DigestOptions,
116    cache: Arc<FileCache>,
117) -> Result<String> {
118    // Pre-allocate string with estimated capacity
119    let estimated_size = estimate_output_size(&files, &options, &cache);
120    let mut output = String::with_capacity(estimated_size);
121
122    // Add document header
123    if !options.doc_header_template.is_empty() {
124        let header = options.doc_header_template.replace("{directory}", ".");
125        output.push_str(&header);
126        output.push_str("\n\n");
127    }
128
129    // Add statistics if requested
130    if options.include_stats {
131        let stats = generate_statistics(&files);
132        output.push_str(&stats);
133        output.push_str("\n\n");
134    }
135
136    // Add file tree if requested
137    if options.include_tree {
138        let tree = generate_file_tree(&files, &options);
139        output.push_str("## File Structure\n\n");
140        output.push_str("```\n");
141        output.push_str(&tree);
142        output.push_str("```\n\n");
143    }
144
145    // Sort files if requested
146    let mut files = files;
147    if options.sort_by_priority {
148        files.sort_by(|a, b| {
149            b.priority
150                .partial_cmp(&a.priority)
151                .unwrap_or(std::cmp::Ordering::Equal)
152                .then_with(|| a.relative_path.cmp(&b.relative_path))
153        });
154    }
155
156    // Add table of contents if requested
157    if options.include_toc {
158        output.push_str("## Table of Contents\n\n");
159        for file in &files {
160            let anchor = path_to_anchor(&file.relative_path);
161            output.push_str(&format!(
162                "- [{path}](#{anchor})\n",
163                path = file.relative_path.display(),
164                anchor = anchor
165            ));
166        }
167        output.push('\n');
168    }
169
170    // Group files if requested
171    if options.group_by_type {
172        let grouped = group_files_by_type(files);
173        for (file_type, group_files) in grouped {
174            output.push_str(&format!("## {} Files\n\n", file_type_display(&file_type)));
175            for file in group_files {
176                append_file_content(&mut output, &file, &options, &cache)?;
177            }
178        }
179    } else {
180        // Add all files
181        for file in files {
182            append_file_content(&mut output, &file, &options, &cache)?;
183        }
184    }
185
186    Ok(output)
187}
188
189/// Append a single file's content to the output
190fn append_file_content(
191    output: &mut String,
192    file: &FileInfo,
193    options: &DigestOptions,
194    cache: &FileCache,
195) -> Result<()> {
196    // Read file content from cache
197    let content = match cache.get_or_load(&file.path) {
198        Ok(content) => content,
199        Err(e) => {
200            eprintln!("Warning: Could not read file {}: {}", file.path.display(), e);
201            return Ok(());
202        }
203    };
204
205    // Add file header with optional metadata
206    let path_with_metadata = if options.enhanced_context {
207        format!(
208            "{} ({}, {})",
209            file.relative_path.display(),
210            format_size(file.size),
211            file_type_display(&file.file_type)
212        )
213    } else {
214        file.relative_path.display().to_string()
215    };
216
217    let header = options.file_header_template.replace("{path}", &path_with_metadata);
218    output.push_str(&header);
219    output.push_str("\n\n");
220
221    // Add language hint for syntax highlighting
222    let language = get_language_hint(&file.file_type);
223    output.push_str(&format!("```{language}\n"));
224    output.push_str(&content);
225    if !content.ends_with('\n') {
226        output.push('\n');
227    }
228    output.push_str("```\n\n");
229
230    Ok(())
231}
232
233/// Generate statistics about the files
234fn generate_statistics(files: &[FileInfo]) -> String {
235    let total_files = files.len();
236    let total_size: u64 = files.iter().map(|f| f.size).sum();
237
238    // Count by file type
239    let mut type_counts: HashMap<FileType, usize> = HashMap::new();
240    for file in files {
241        *type_counts.entry(file.file_type.clone()).or_insert(0) += 1;
242    }
243
244    // Pre-allocate with estimated capacity
245    let mut stats = String::with_capacity(500 + type_counts.len() * 50);
246    stats.push_str("## Statistics\n\n");
247    stats.push_str(&format!("- Total files: {total_files}\n"));
248    stats.push_str(&format!("- Total size: {} bytes\n", format_size(total_size)));
249    stats.push_str("\n### Files by type:\n");
250
251    let mut types: Vec<_> = type_counts.into_iter().collect();
252    types.sort_by_key(|(_, count)| std::cmp::Reverse(*count));
253
254    for (file_type, count) in types {
255        stats.push_str(&format!("- {}: {}\n", file_type_display(&file_type), count));
256    }
257
258    stats
259}
260
261/// Generate a file tree representation
262fn generate_file_tree(files: &[FileInfo], options: &DigestOptions) -> String {
263    use std::collections::{BTreeMap, HashMap};
264
265    #[derive(Default)]
266    struct TreeNode {
267        files: Vec<String>,
268        dirs: BTreeMap<String, TreeNode>,
269    }
270
271    let mut root = TreeNode::default();
272
273    // Create a lookup map from relative path to FileInfo for metadata
274    let file_lookup: HashMap<String, &FileInfo> =
275        files.iter().map(|f| (f.relative_path.to_string_lossy().to_string(), f)).collect();
276
277    // Build tree structure
278    for file in files {
279        let parts: Vec<_> = file
280            .relative_path
281            .components()
282            .map(|c| c.as_os_str().to_string_lossy().to_string())
283            .collect();
284
285        let mut current = &mut root;
286        for (i, part) in parts.iter().enumerate() {
287            if i == parts.len() - 1 {
288                // File
289                current.files.push(part.clone());
290            } else {
291                // Directory
292                current = current.dirs.entry(part.clone()).or_default();
293            }
294        }
295    }
296
297    // Render tree
298    fn render_tree(
299        node: &TreeNode,
300        prefix: &str,
301        _is_last: bool,
302        current_path: &str,
303        file_lookup: &HashMap<String, &FileInfo>,
304        options: &DigestOptions,
305    ) -> String {
306        // Pre-allocate with estimated size
307        let estimated_size = (node.dirs.len() + node.files.len()) * 100;
308        let mut output = String::with_capacity(estimated_size);
309
310        // Render directories
311        let dir_count = node.dirs.len();
312        for (i, (name, child)) in node.dirs.iter().enumerate() {
313            let is_last_dir = i == dir_count - 1 && node.files.is_empty();
314            let connector = if is_last_dir { "└── " } else { "├── " };
315            let extension = if is_last_dir { "    " } else { "│   " };
316
317            output.push_str(&format!("{prefix}{connector}{name}/\n"));
318            let child_path = if current_path.is_empty() {
319                name.clone()
320            } else {
321                format!("{current_path}/{name}")
322            };
323            output.push_str(&render_tree(
324                child,
325                &format!("{prefix}{extension}"),
326                is_last_dir,
327                &child_path,
328                file_lookup,
329                options,
330            ));
331        }
332
333        // Render files
334        let file_count = node.files.len();
335        for (i, name) in node.files.iter().enumerate() {
336            let is_last_file = i == file_count - 1;
337            let connector = if is_last_file { "└── " } else { "├── " };
338
339            let file_path = if current_path.is_empty() {
340                name.clone()
341            } else {
342                format!("{current_path}/{name}")
343            };
344
345            // Include metadata if enhanced context is enabled
346            let display_name = if options.enhanced_context {
347                if let Some(file_info) = file_lookup.get(&file_path) {
348                    format!(
349                        "{} ({}, {})",
350                        name,
351                        format_size(file_info.size),
352                        file_type_display(&file_info.file_type)
353                    )
354                } else {
355                    name.clone()
356                }
357            } else {
358                name.clone()
359            };
360
361            output.push_str(&format!("{prefix}{connector}{display_name}\n"));
362        }
363
364        output
365    }
366
367    // Pre-allocate output string
368    let mut output = String::with_capacity(files.len() * 100 + 10);
369    output.push_str(".\n");
370    output.push_str(&render_tree(&root, "", true, "", &file_lookup, options));
371    output
372}
373
374/// Group files by their type
375fn group_files_by_type(files: Vec<FileInfo>) -> Vec<(FileType, Vec<FileInfo>)> {
376    let mut groups: HashMap<FileType, Vec<FileInfo>> = HashMap::new();
377
378    for file in files {
379        groups.entry(file.file_type.clone()).or_default().push(file);
380    }
381
382    let mut result: Vec<_> = groups.into_iter().collect();
383    result.sort_by_key(|(file_type, _)| file_type_priority(file_type));
384    result
385}
386
387/// Get display name for file type
388fn file_type_display(file_type: &FileType) -> &'static str {
389    match file_type {
390        FileType::Rust => "Rust",
391        FileType::Python => "Python",
392        FileType::JavaScript => "JavaScript",
393        FileType::TypeScript => "TypeScript",
394        FileType::Go => "Go",
395        FileType::Java => "Java",
396        FileType::Cpp => "C++",
397        FileType::C => "C",
398        FileType::CSharp => "C#",
399        FileType::Ruby => "Ruby",
400        FileType::Php => "PHP",
401        FileType::Swift => "Swift",
402        FileType::Kotlin => "Kotlin",
403        FileType::Scala => "Scala",
404        FileType::Haskell => "Haskell",
405        FileType::Markdown => "Markdown",
406        FileType::Json => "JSON",
407        FileType::Yaml => "YAML",
408        FileType::Toml => "TOML",
409        FileType::Xml => "XML",
410        FileType::Html => "HTML",
411        FileType::Css => "CSS",
412        FileType::Text => "Text",
413        FileType::Other => "Other",
414    }
415}
416
417/// Get language hint for syntax highlighting
418fn get_language_hint(file_type: &FileType) -> &'static str {
419    match file_type {
420        FileType::Rust => "rust",
421        FileType::Python => "python",
422        FileType::JavaScript => "javascript",
423        FileType::TypeScript => "typescript",
424        FileType::Go => "go",
425        FileType::Java => "java",
426        FileType::Cpp => "cpp",
427        FileType::C => "c",
428        FileType::CSharp => "csharp",
429        FileType::Ruby => "ruby",
430        FileType::Php => "php",
431        FileType::Swift => "swift",
432        FileType::Kotlin => "kotlin",
433        FileType::Scala => "scala",
434        FileType::Haskell => "haskell",
435        FileType::Markdown => "markdown",
436        FileType::Json => "json",
437        FileType::Yaml => "yaml",
438        FileType::Toml => "toml",
439        FileType::Xml => "xml",
440        FileType::Html => "html",
441        FileType::Css => "css",
442        FileType::Text => "text",
443        FileType::Other => "",
444    }
445}
446
447/// Get priority for file type ordering
448fn file_type_priority(file_type: &FileType) -> u8 {
449    match file_type {
450        FileType::Rust => 1,
451        FileType::Python => 2,
452        FileType::JavaScript => 3,
453        FileType::TypeScript => 3,
454        FileType::Go => 4,
455        FileType::Java => 5,
456        FileType::Cpp => 6,
457        FileType::C => 7,
458        FileType::CSharp => 8,
459        FileType::Ruby => 9,
460        FileType::Php => 10,
461        FileType::Swift => 11,
462        FileType::Kotlin => 12,
463        FileType::Scala => 13,
464        FileType::Haskell => 14,
465        FileType::Markdown => 15,
466        FileType::Json => 16,
467        FileType::Yaml => 17,
468        FileType::Toml => 18,
469        FileType::Xml => 19,
470        FileType::Html => 20,
471        FileType::Css => 21,
472        FileType::Text => 22,
473        FileType::Other => 23,
474    }
475}
476
477/// Convert path to anchor-friendly string
478fn path_to_anchor(path: &Path) -> String {
479    path.display().to_string().replace(['/', '\\', '.', ' '], "-").to_lowercase()
480}
481
482/// Format file size in human-readable format
483fn format_size(size: u64) -> String {
484    const UNITS: &[&str] = &["B", "KB", "MB", "GB"];
485    let mut size = size as f64;
486    let mut unit_index = 0;
487
488    while size >= 1024.0 && unit_index < UNITS.len() - 1 {
489        size /= 1024.0;
490        unit_index += 1;
491    }
492
493    if unit_index == 0 {
494        format!("{} {}", size as u64, UNITS[unit_index])
495    } else {
496        format!("{:.2} {}", size, UNITS[unit_index])
497    }
498}
499
500#[cfg(test)]
501mod tests {
502    use super::*;
503    use std::path::PathBuf;
504
505    fn create_test_cache() -> Arc<FileCache> {
506        Arc::new(FileCache::new())
507    }
508
509    #[test]
510    fn test_format_size() {
511        assert_eq!(format_size(512), "512 B");
512        assert_eq!(format_size(1024), "1.00 KB");
513        assert_eq!(format_size(1536), "1.50 KB");
514        assert_eq!(format_size(1048576), "1.00 MB");
515    }
516
517    #[test]
518    fn test_path_to_anchor() {
519        assert_eq!(path_to_anchor(Path::new("src/main.rs")), "src-main-rs");
520        assert_eq!(path_to_anchor(Path::new("test file.txt")), "test-file-txt");
521    }
522
523    #[test]
524    fn test_file_type_display() {
525        assert_eq!(file_type_display(&FileType::Rust), "Rust");
526        assert_eq!(file_type_display(&FileType::Python), "Python");
527    }
528
529    #[test]
530    fn test_generate_statistics() {
531        let files = vec![
532            FileInfo {
533                path: PathBuf::from("test1.rs"),
534                relative_path: PathBuf::from("test1.rs"),
535                size: 100,
536                file_type: FileType::Rust,
537                priority: 1.0,
538            },
539            FileInfo {
540                path: PathBuf::from("test2.py"),
541                relative_path: PathBuf::from("test2.py"),
542                size: 200,
543                file_type: FileType::Python,
544                priority: 0.9,
545            },
546        ];
547
548        let stats = generate_statistics(&files);
549        assert!(stats.contains("Total files: 2"));
550        assert!(stats.contains("Total size: 300 B"));
551        assert!(stats.contains("Rust: 1"));
552        assert!(stats.contains("Python: 1"));
553    }
554
555    #[test]
556    fn test_generate_statistics_empty() {
557        let files = vec![];
558        let stats = generate_statistics(&files);
559        assert!(stats.contains("Total files: 0"));
560        assert!(stats.contains("Total size: 0 B"));
561    }
562
563    #[test]
564    fn test_generate_statistics_large_files() {
565        let files = vec![
566            FileInfo {
567                path: PathBuf::from("large.rs"),
568                relative_path: PathBuf::from("large.rs"),
569                size: 2_000_000, // 2MB
570                file_type: FileType::Rust,
571                priority: 1.0,
572            },
573            FileInfo {
574                path: PathBuf::from("huge.py"),
575                relative_path: PathBuf::from("huge.py"),
576                size: 50_000_000, // 50MB
577                file_type: FileType::Python,
578                priority: 0.9,
579            },
580        ];
581
582        let stats = generate_statistics(&files);
583        assert!(stats.contains("Total files: 2"));
584        assert!(stats.contains("MB bytes")); // Just check that it's in MB
585        assert!(stats.contains("Python: 1"));
586        assert!(stats.contains("Rust: 1"));
587    }
588
589    #[test]
590    fn test_generate_file_tree_with_grouping() {
591        let files = vec![
592            FileInfo {
593                path: PathBuf::from("src/main.rs"),
594                relative_path: PathBuf::from("src/main.rs"),
595                size: 1000,
596                file_type: FileType::Rust,
597                priority: 1.5,
598            },
599            FileInfo {
600                path: PathBuf::from("src/lib.rs"),
601                relative_path: PathBuf::from("src/lib.rs"),
602                size: 2000,
603                file_type: FileType::Rust,
604                priority: 1.2,
605            },
606            FileInfo {
607                path: PathBuf::from("tests/test.rs"),
608                relative_path: PathBuf::from("tests/test.rs"),
609                size: 500,
610                file_type: FileType::Rust,
611                priority: 0.8,
612            },
613        ];
614
615        let options = DigestOptions::default();
616        let tree = generate_file_tree(&files, &options);
617        assert!(tree.contains("src/"));
618        assert!(tree.contains("tests/"));
619        assert!(tree.contains("main.rs"));
620        assert!(tree.contains("lib.rs"));
621        assert!(tree.contains("test.rs"));
622    }
623
624    #[test]
625    fn test_digest_options_from_config() {
626        use crate::cli::Config;
627        use tempfile::TempDir;
628
629        let temp_dir = TempDir::new().unwrap();
630        let config = Config {
631            prompt: None,
632            paths: Some(vec![temp_dir.path().to_path_buf()]),
633            output_file: None,
634            max_tokens: Some(100000),
635            llm_tool: crate::cli::LlmTool::default(),
636            quiet: false,
637            verbose: false,
638            config: None,
639            progress: false,
640            repo: None,
641            read_stdin: false,
642            copy: false,
643            enhanced_context: false,
644            custom_priorities: vec![],
645        };
646
647        let options = DigestOptions::from_config(&config).unwrap();
648        assert_eq!(options.max_tokens, Some(100000));
649        assert!(options.include_tree);
650        assert!(options.include_stats);
651        assert!(!options.group_by_type); // Default is false according to implementation
652    }
653
654    #[test]
655    fn test_generate_markdown_structure_headers() {
656        let files = vec![];
657
658        let options = DigestOptions {
659            max_tokens: None,
660            include_tree: true,
661            include_stats: true,
662            group_by_type: true,
663            sort_by_priority: true,
664            file_header_template: "## {path}".to_string(),
665            doc_header_template: "# Code Digest".to_string(),
666            include_toc: true,
667            enhanced_context: false,
668        };
669
670        let cache = create_test_cache();
671        let markdown = generate_markdown(files, options, cache).unwrap();
672
673        // Check that main structure is present even with no files
674        assert!(markdown.contains("# Code Digest"));
675        assert!(markdown.contains("## Statistics"));
676    }
677
678    #[test]
679    fn test_enhanced_tree_generation_with_metadata() {
680        use crate::core::walker::FileInfo;
681        use crate::utils::file_ext::FileType;
682        use std::path::PathBuf;
683
684        let files = vec![
685            FileInfo {
686                path: PathBuf::from("src/main.rs"),
687                relative_path: PathBuf::from("src/main.rs"),
688                size: 145,
689                file_type: FileType::Rust,
690                priority: 1.5,
691            },
692            FileInfo {
693                path: PathBuf::from("src/lib.rs"),
694                relative_path: PathBuf::from("src/lib.rs"),
695                size: 89,
696                file_type: FileType::Rust,
697                priority: 1.2,
698            },
699        ];
700
701        let options = DigestOptions {
702            max_tokens: None,
703            include_tree: true,
704            include_stats: true,
705            group_by_type: false,
706            sort_by_priority: true,
707            file_header_template: "## {path}".to_string(),
708            doc_header_template: "# Code Digest".to_string(),
709            include_toc: true,
710            enhanced_context: true,
711        };
712
713        let cache = create_test_cache();
714        let markdown = generate_markdown(files, options, cache).unwrap();
715
716        // Should include file sizes and types in tree
717        assert!(markdown.contains("main.rs (145 B, Rust)"));
718        assert!(markdown.contains("lib.rs (89 B, Rust)"));
719    }
720
721    #[test]
722    fn test_enhanced_file_headers_with_metadata() {
723        use crate::core::walker::FileInfo;
724        use crate::utils::file_ext::FileType;
725        use std::path::PathBuf;
726
727        let files = vec![FileInfo {
728            path: PathBuf::from("src/main.rs"),
729            relative_path: PathBuf::from("src/main.rs"),
730            size: 145,
731            file_type: FileType::Rust,
732            priority: 1.5,
733        }];
734
735        let options = DigestOptions {
736            max_tokens: None,
737            include_tree: true,
738            include_stats: true,
739            group_by_type: false,
740            sort_by_priority: true,
741            file_header_template: "## {path}".to_string(),
742            doc_header_template: "# Code Digest".to_string(),
743            include_toc: true,
744            enhanced_context: true,
745        };
746
747        let cache = create_test_cache();
748        let markdown = generate_markdown(files, options, cache).unwrap();
749
750        // Should include metadata in file headers
751        assert!(markdown.contains("## src/main.rs (145 B, Rust)"));
752    }
753
754    #[test]
755    fn test_basic_mode_unchanged() {
756        use crate::core::walker::FileInfo;
757        use crate::utils::file_ext::FileType;
758        use std::path::PathBuf;
759
760        let files = vec![FileInfo {
761            path: PathBuf::from("src/main.rs"),
762            relative_path: PathBuf::from("src/main.rs"),
763            size: 145,
764            file_type: FileType::Rust,
765            priority: 1.5,
766        }];
767
768        let options = DigestOptions {
769            max_tokens: None,
770            include_tree: true,
771            include_stats: true,
772            group_by_type: false,
773            sort_by_priority: true,
774            file_header_template: "## {path}".to_string(),
775            doc_header_template: "# Code Digest".to_string(),
776            include_toc: true,
777            enhanced_context: false,
778        };
779
780        let cache = create_test_cache();
781        let markdown = generate_markdown(files, options, cache).unwrap();
782
783        // Should NOT include metadata - backward compatibility
784        assert!(markdown.contains("## src/main.rs"));
785        assert!(!markdown.contains("## src/main.rs (145 B, Rust)"));
786        assert!(markdown.contains("main.rs") && !markdown.contains("main.rs (145 B, Rust)"));
787    }
788}