code_digest/core/
digest.rs

1//! Markdown generation functionality
2
3use crate::core::walker::FileInfo;
4use crate::utils::file_ext::FileType;
5use anyhow::Result;
6use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10/// Options for generating markdown digest
11#[derive(Debug, Clone)]
12pub struct DigestOptions {
13    /// Maximum tokens allowed in the output
14    pub max_tokens: Option<usize>,
15    /// Include file tree in output
16    pub include_tree: bool,
17    /// Include token count statistics
18    pub include_stats: bool,
19    /// Group files by type
20    pub group_by_type: bool,
21    /// Sort files by priority
22    pub sort_by_priority: bool,
23    /// Template for file headers
24    pub file_header_template: String,
25    /// Template for the document header
26    pub doc_header_template: String,
27    /// Include table of contents
28    pub include_toc: bool,
29}
30
31impl DigestOptions {
32    /// Create DigestOptions from CLI config
33    pub fn from_config(config: &crate::cli::Config) -> Result<Self> {
34        Ok(DigestOptions {
35            max_tokens: config.max_tokens,
36            include_tree: true,
37            include_stats: true,
38            group_by_type: false,
39            sort_by_priority: true,
40            file_header_template: "## {path}".to_string(),
41            doc_header_template: "# Code Digest: {directory}".to_string(),
42            include_toc: true,
43        })
44    }
45}
46
47impl Default for DigestOptions {
48    fn default() -> Self {
49        DigestOptions {
50            max_tokens: None,
51            include_tree: true,
52            include_stats: true,
53            group_by_type: false,
54            sort_by_priority: true,
55            file_header_template: "## {path}".to_string(),
56            doc_header_template: "# Code Digest: {directory}".to_string(),
57            include_toc: true,
58        }
59    }
60}
61
62/// Generate markdown from a list of files
63pub fn generate_markdown(files: Vec<FileInfo>, options: DigestOptions) -> Result<String> {
64    let mut output = String::new();
65
66    // Add document header
67    if !options.doc_header_template.is_empty() {
68        let header = options.doc_header_template.replace("{directory}", ".");
69        output.push_str(&header);
70        output.push_str("\n\n");
71    }
72
73    // Add statistics if requested
74    if options.include_stats {
75        let stats = generate_statistics(&files);
76        output.push_str(&stats);
77        output.push_str("\n\n");
78    }
79
80    // Add file tree if requested
81    if options.include_tree {
82        let tree = generate_file_tree(&files);
83        output.push_str("## File Structure\n\n");
84        output.push_str("```\n");
85        output.push_str(&tree);
86        output.push_str("```\n\n");
87    }
88
89    // Sort files if requested
90    let mut files = files;
91    if options.sort_by_priority {
92        files.sort_by(|a, b| {
93            b.priority
94                .partial_cmp(&a.priority)
95                .unwrap_or(std::cmp::Ordering::Equal)
96                .then_with(|| a.relative_path.cmp(&b.relative_path))
97        });
98    }
99
100    // Add table of contents if requested
101    if options.include_toc {
102        output.push_str("## Table of Contents\n\n");
103        for file in &files {
104            let anchor = path_to_anchor(&file.relative_path);
105            output.push_str(&format!(
106                "- [{path}](#{anchor})\n",
107                path = file.relative_path.display(),
108                anchor = anchor
109            ));
110        }
111        output.push('\n');
112    }
113
114    // Group files if requested
115    if options.group_by_type {
116        let grouped = group_files_by_type(files);
117        for (file_type, group_files) in grouped {
118            output.push_str(&format!("## {} Files\n\n", file_type_display(&file_type)));
119            for file in group_files {
120                append_file_content(&mut output, &file, &options)?;
121            }
122        }
123    } else {
124        // Add all files
125        for file in files {
126            append_file_content(&mut output, &file, &options)?;
127        }
128    }
129
130    Ok(output)
131}
132
133/// Append a single file's content to the output
134fn append_file_content(
135    output: &mut String,
136    file: &FileInfo,
137    options: &DigestOptions,
138) -> Result<()> {
139    // Read file content
140    let content = match fs::read_to_string(&file.path) {
141        Ok(content) => content,
142        Err(e) => {
143            eprintln!("Warning: Could not read file {}: {}", file.path.display(), e);
144            return Ok(());
145        }
146    };
147
148    // Add file header
149    let header =
150        options.file_header_template.replace("{path}", &file.relative_path.display().to_string());
151    output.push_str(&header);
152    output.push_str("\n\n");
153
154    // Add language hint for syntax highlighting
155    let language = get_language_hint(&file.file_type);
156    output.push_str(&format!("```{language}\n"));
157    output.push_str(&content);
158    if !content.ends_with('\n') {
159        output.push('\n');
160    }
161    output.push_str("```\n\n");
162
163    Ok(())
164}
165
166/// Generate statistics about the files
167fn generate_statistics(files: &[FileInfo]) -> String {
168    let total_files = files.len();
169    let total_size: u64 = files.iter().map(|f| f.size).sum();
170
171    // Count by file type
172    let mut type_counts: HashMap<FileType, usize> = HashMap::new();
173    for file in files {
174        *type_counts.entry(file.file_type.clone()).or_insert(0) += 1;
175    }
176
177    let mut stats = String::new();
178    stats.push_str("## Statistics\n\n");
179    stats.push_str(&format!("- Total files: {total_files}\n"));
180    stats.push_str(&format!("- Total size: {} bytes\n", format_size(total_size)));
181    stats.push_str("\n### Files by type:\n");
182
183    let mut types: Vec<_> = type_counts.into_iter().collect();
184    types.sort_by_key(|(_, count)| std::cmp::Reverse(*count));
185
186    for (file_type, count) in types {
187        stats.push_str(&format!("- {}: {}\n", file_type_display(&file_type), count));
188    }
189
190    stats
191}
192
193/// Generate a file tree representation
194fn generate_file_tree(files: &[FileInfo]) -> String {
195    use std::collections::BTreeMap;
196
197    #[derive(Default)]
198    struct TreeNode {
199        files: Vec<String>,
200        dirs: BTreeMap<String, TreeNode>,
201    }
202
203    let mut root = TreeNode::default();
204
205    // Build tree structure
206    for file in files {
207        let parts: Vec<_> = file
208            .relative_path
209            .components()
210            .map(|c| c.as_os_str().to_string_lossy().to_string())
211            .collect();
212
213        let mut current = &mut root;
214        for (i, part) in parts.iter().enumerate() {
215            if i == parts.len() - 1 {
216                // File
217                current.files.push(part.clone());
218            } else {
219                // Directory
220                current = current.dirs.entry(part.clone()).or_default();
221            }
222        }
223    }
224
225    // Render tree
226    fn render_tree(node: &TreeNode, prefix: &str, _is_last: bool) -> String {
227        let mut output = String::new();
228
229        // Render directories
230        let dir_count = node.dirs.len();
231        for (i, (name, child)) in node.dirs.iter().enumerate() {
232            let is_last_dir = i == dir_count - 1 && node.files.is_empty();
233            let connector = if is_last_dir { "└── " } else { "├── " };
234            let extension = if is_last_dir { "    " } else { "│   " };
235
236            output.push_str(&format!("{prefix}{connector}{name}/\n"));
237            output.push_str(&render_tree(child, &format!("{prefix}{extension}"), is_last_dir));
238        }
239
240        // Render files
241        let file_count = node.files.len();
242        for (i, name) in node.files.iter().enumerate() {
243            let is_last_file = i == file_count - 1;
244            let connector = if is_last_file { "└── " } else { "├── " };
245            output.push_str(&format!("{prefix}{connector}{name}\n"));
246        }
247
248        output
249    }
250
251    let mut output = String::new();
252    output.push_str(".\n");
253    output.push_str(&render_tree(&root, "", true));
254    output
255}
256
257/// Group files by their type
258fn group_files_by_type(files: Vec<FileInfo>) -> Vec<(FileType, Vec<FileInfo>)> {
259    let mut groups: HashMap<FileType, Vec<FileInfo>> = HashMap::new();
260
261    for file in files {
262        groups.entry(file.file_type.clone()).or_default().push(file);
263    }
264
265    let mut result: Vec<_> = groups.into_iter().collect();
266    result.sort_by_key(|(file_type, _)| file_type_priority(file_type));
267    result
268}
269
270/// Get display name for file type
271fn file_type_display(file_type: &FileType) -> &'static str {
272    match file_type {
273        FileType::Rust => "Rust",
274        FileType::Python => "Python",
275        FileType::JavaScript => "JavaScript",
276        FileType::TypeScript => "TypeScript",
277        FileType::Go => "Go",
278        FileType::Java => "Java",
279        FileType::Cpp => "C++",
280        FileType::C => "C",
281        FileType::CSharp => "C#",
282        FileType::Ruby => "Ruby",
283        FileType::Php => "PHP",
284        FileType::Swift => "Swift",
285        FileType::Kotlin => "Kotlin",
286        FileType::Scala => "Scala",
287        FileType::Haskell => "Haskell",
288        FileType::Markdown => "Markdown",
289        FileType::Json => "JSON",
290        FileType::Yaml => "YAML",
291        FileType::Toml => "TOML",
292        FileType::Xml => "XML",
293        FileType::Html => "HTML",
294        FileType::Css => "CSS",
295        FileType::Text => "Text",
296        FileType::Other => "Other",
297    }
298}
299
300/// Get language hint for syntax highlighting
301fn get_language_hint(file_type: &FileType) -> &'static str {
302    match file_type {
303        FileType::Rust => "rust",
304        FileType::Python => "python",
305        FileType::JavaScript => "javascript",
306        FileType::TypeScript => "typescript",
307        FileType::Go => "go",
308        FileType::Java => "java",
309        FileType::Cpp => "cpp",
310        FileType::C => "c",
311        FileType::CSharp => "csharp",
312        FileType::Ruby => "ruby",
313        FileType::Php => "php",
314        FileType::Swift => "swift",
315        FileType::Kotlin => "kotlin",
316        FileType::Scala => "scala",
317        FileType::Haskell => "haskell",
318        FileType::Markdown => "markdown",
319        FileType::Json => "json",
320        FileType::Yaml => "yaml",
321        FileType::Toml => "toml",
322        FileType::Xml => "xml",
323        FileType::Html => "html",
324        FileType::Css => "css",
325        FileType::Text => "text",
326        FileType::Other => "",
327    }
328}
329
330/// Get priority for file type ordering
331fn file_type_priority(file_type: &FileType) -> u8 {
332    match file_type {
333        FileType::Rust => 1,
334        FileType::Python => 2,
335        FileType::JavaScript => 3,
336        FileType::TypeScript => 3,
337        FileType::Go => 4,
338        FileType::Java => 5,
339        FileType::Cpp => 6,
340        FileType::C => 7,
341        FileType::CSharp => 8,
342        FileType::Ruby => 9,
343        FileType::Php => 10,
344        FileType::Swift => 11,
345        FileType::Kotlin => 12,
346        FileType::Scala => 13,
347        FileType::Haskell => 14,
348        FileType::Markdown => 15,
349        FileType::Json => 16,
350        FileType::Yaml => 17,
351        FileType::Toml => 18,
352        FileType::Xml => 19,
353        FileType::Html => 20,
354        FileType::Css => 21,
355        FileType::Text => 22,
356        FileType::Other => 23,
357    }
358}
359
360/// Convert path to anchor-friendly string
361fn path_to_anchor(path: &Path) -> String {
362    path.display().to_string().replace(['/', '\\', '.', ' '], "-").to_lowercase()
363}
364
365/// Format file size in human-readable format
366fn format_size(size: u64) -> String {
367    const UNITS: &[&str] = &["B", "KB", "MB", "GB"];
368    let mut size = size as f64;
369    let mut unit_index = 0;
370
371    while size >= 1024.0 && unit_index < UNITS.len() - 1 {
372        size /= 1024.0;
373        unit_index += 1;
374    }
375
376    if unit_index == 0 {
377        format!("{} {}", size as u64, UNITS[unit_index])
378    } else {
379        format!("{:.2} {}", size, UNITS[unit_index])
380    }
381}
382
383#[cfg(test)]
384mod tests {
385    use super::*;
386    use std::path::PathBuf;
387
388    #[test]
389    fn test_format_size() {
390        assert_eq!(format_size(512), "512 B");
391        assert_eq!(format_size(1024), "1.00 KB");
392        assert_eq!(format_size(1536), "1.50 KB");
393        assert_eq!(format_size(1048576), "1.00 MB");
394    }
395
396    #[test]
397    fn test_path_to_anchor() {
398        assert_eq!(path_to_anchor(Path::new("src/main.rs")), "src-main-rs");
399        assert_eq!(path_to_anchor(Path::new("test file.txt")), "test-file-txt");
400    }
401
402    #[test]
403    fn test_file_type_display() {
404        assert_eq!(file_type_display(&FileType::Rust), "Rust");
405        assert_eq!(file_type_display(&FileType::Python), "Python");
406    }
407
408    #[test]
409    fn test_generate_statistics() {
410        let files = vec![
411            FileInfo {
412                path: PathBuf::from("test1.rs"),
413                relative_path: PathBuf::from("test1.rs"),
414                size: 100,
415                file_type: FileType::Rust,
416                priority: 1.0,
417            },
418            FileInfo {
419                path: PathBuf::from("test2.py"),
420                relative_path: PathBuf::from("test2.py"),
421                size: 200,
422                file_type: FileType::Python,
423                priority: 0.9,
424            },
425        ];
426
427        let stats = generate_statistics(&files);
428        assert!(stats.contains("Total files: 2"));
429        assert!(stats.contains("Total size: 300 B"));
430        assert!(stats.contains("Rust: 1"));
431        assert!(stats.contains("Python: 1"));
432    }
433
434    #[test]
435    fn test_generate_statistics_empty() {
436        let files = vec![];
437        let stats = generate_statistics(&files);
438        assert!(stats.contains("Total files: 0"));
439        assert!(stats.contains("Total size: 0 B"));
440    }
441
442    #[test]
443    fn test_generate_statistics_large_files() {
444        let files = vec![
445            FileInfo {
446                path: PathBuf::from("large.rs"),
447                relative_path: PathBuf::from("large.rs"),
448                size: 2_000_000, // 2MB
449                file_type: FileType::Rust,
450                priority: 1.0,
451            },
452            FileInfo {
453                path: PathBuf::from("huge.py"),
454                relative_path: PathBuf::from("huge.py"),
455                size: 50_000_000, // 50MB
456                file_type: FileType::Python,
457                priority: 0.9,
458            },
459        ];
460
461        let stats = generate_statistics(&files);
462        assert!(stats.contains("Total files: 2"));
463        assert!(stats.contains("MB bytes")); // Just check that it's in MB
464        assert!(stats.contains("Python: 1"));
465        assert!(stats.contains("Rust: 1"));
466    }
467
468    #[test]
469    fn test_generate_file_tree_with_grouping() {
470        let files = vec![
471            FileInfo {
472                path: PathBuf::from("src/main.rs"),
473                relative_path: PathBuf::from("src/main.rs"),
474                size: 1000,
475                file_type: FileType::Rust,
476                priority: 1.5,
477            },
478            FileInfo {
479                path: PathBuf::from("src/lib.rs"),
480                relative_path: PathBuf::from("src/lib.rs"),
481                size: 2000,
482                file_type: FileType::Rust,
483                priority: 1.2,
484            },
485            FileInfo {
486                path: PathBuf::from("tests/test.rs"),
487                relative_path: PathBuf::from("tests/test.rs"),
488                size: 500,
489                file_type: FileType::Rust,
490                priority: 0.8,
491            },
492        ];
493
494        let tree = generate_file_tree(&files);
495        assert!(tree.contains("src/"));
496        assert!(tree.contains("tests/"));
497        assert!(tree.contains("main.rs"));
498        assert!(tree.contains("lib.rs"));
499        assert!(tree.contains("test.rs"));
500    }
501
502    #[test]
503    fn test_digest_options_from_config() {
504        use crate::cli::Config;
505        use tempfile::TempDir;
506
507        let temp_dir = TempDir::new().unwrap();
508        let config = Config {
509            prompt: None,
510            prompt_flag: None,
511            directories: vec![temp_dir.path().to_path_buf()],
512            directories_positional: vec![],
513            output_file: None,
514            max_tokens: Some(100000),
515            llm_tool: crate::cli::LlmTool::default(),
516            quiet: false,
517            verbose: false,
518            config: None,
519            progress: false,
520            repo: None,
521            read_stdin: false,
522        };
523
524        let options = DigestOptions::from_config(&config).unwrap();
525        assert_eq!(options.max_tokens, Some(100000));
526        assert!(options.include_tree);
527        assert!(options.include_stats);
528        assert!(!options.group_by_type); // Default is false according to implementation
529    }
530
531    #[test]
532    fn test_generate_markdown_structure_headers() {
533        let files = vec![];
534
535        let options = DigestOptions {
536            max_tokens: None,
537            include_tree: true,
538            include_stats: true,
539            group_by_type: true,
540            sort_by_priority: true,
541            file_header_template: "## {path}".to_string(),
542            doc_header_template: "# Code Digest".to_string(),
543            include_toc: true,
544        };
545
546        let markdown = generate_markdown(files, options).unwrap();
547
548        // Check that main structure is present even with no files
549        assert!(markdown.contains("# Code Digest"));
550        assert!(markdown.contains("## Statistics"));
551        // File tree might be skipped if there are no files
552        assert!(markdown.contains("## Files"));
553    }
554}