1use crate::core::walker::FileInfo;
4use crate::utils::file_ext::FileType;
5use anyhow::Result;
6use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10#[derive(Debug, Clone)]
12pub struct DigestOptions {
13 pub max_tokens: Option<usize>,
15 pub include_tree: bool,
17 pub include_stats: bool,
19 pub group_by_type: bool,
21 pub sort_by_priority: bool,
23 pub file_header_template: String,
25 pub doc_header_template: String,
27 pub include_toc: bool,
29}
30
31impl DigestOptions {
32 pub fn from_config(config: &crate::cli::Config) -> Result<Self> {
34 Ok(DigestOptions {
35 max_tokens: config.max_tokens,
36 include_tree: true,
37 include_stats: true,
38 group_by_type: false,
39 sort_by_priority: true,
40 file_header_template: "## {path}".to_string(),
41 doc_header_template: "# Code Digest: {directory}".to_string(),
42 include_toc: true,
43 })
44 }
45}
46
47impl Default for DigestOptions {
48 fn default() -> Self {
49 DigestOptions {
50 max_tokens: None,
51 include_tree: true,
52 include_stats: true,
53 group_by_type: false,
54 sort_by_priority: true,
55 file_header_template: "## {path}".to_string(),
56 doc_header_template: "# Code Digest: {directory}".to_string(),
57 include_toc: true,
58 }
59 }
60}
61
62pub fn generate_markdown(files: Vec<FileInfo>, options: DigestOptions) -> Result<String> {
64 let mut output = String::new();
65
66 if !options.doc_header_template.is_empty() {
68 let header = options.doc_header_template.replace("{directory}", ".");
69 output.push_str(&header);
70 output.push_str("\n\n");
71 }
72
73 if options.include_stats {
75 let stats = generate_statistics(&files);
76 output.push_str(&stats);
77 output.push_str("\n\n");
78 }
79
80 if options.include_tree {
82 let tree = generate_file_tree(&files);
83 output.push_str("## File Structure\n\n");
84 output.push_str("```\n");
85 output.push_str(&tree);
86 output.push_str("```\n\n");
87 }
88
89 let mut files = files;
91 if options.sort_by_priority {
92 files.sort_by(|a, b| {
93 b.priority
94 .partial_cmp(&a.priority)
95 .unwrap_or(std::cmp::Ordering::Equal)
96 .then_with(|| a.relative_path.cmp(&b.relative_path))
97 });
98 }
99
100 if options.include_toc {
102 output.push_str("## Table of Contents\n\n");
103 for file in &files {
104 let anchor = path_to_anchor(&file.relative_path);
105 output.push_str(&format!(
106 "- [{path}](#{anchor})\n",
107 path = file.relative_path.display(),
108 anchor = anchor
109 ));
110 }
111 output.push('\n');
112 }
113
114 if options.group_by_type {
116 let grouped = group_files_by_type(files);
117 for (file_type, group_files) in grouped {
118 output.push_str(&format!("## {} Files\n\n", file_type_display(&file_type)));
119 for file in group_files {
120 append_file_content(&mut output, &file, &options)?;
121 }
122 }
123 } else {
124 for file in files {
126 append_file_content(&mut output, &file, &options)?;
127 }
128 }
129
130 Ok(output)
131}
132
133fn append_file_content(
135 output: &mut String,
136 file: &FileInfo,
137 options: &DigestOptions,
138) -> Result<()> {
139 let content = match fs::read_to_string(&file.path) {
141 Ok(content) => content,
142 Err(e) => {
143 eprintln!("Warning: Could not read file {}: {}", file.path.display(), e);
144 return Ok(());
145 }
146 };
147
148 let header =
150 options.file_header_template.replace("{path}", &file.relative_path.display().to_string());
151 output.push_str(&header);
152 output.push_str("\n\n");
153
154 let language = get_language_hint(&file.file_type);
156 output.push_str(&format!("```{language}\n"));
157 output.push_str(&content);
158 if !content.ends_with('\n') {
159 output.push('\n');
160 }
161 output.push_str("```\n\n");
162
163 Ok(())
164}
165
166fn generate_statistics(files: &[FileInfo]) -> String {
168 let total_files = files.len();
169 let total_size: u64 = files.iter().map(|f| f.size).sum();
170
171 let mut type_counts: HashMap<FileType, usize> = HashMap::new();
173 for file in files {
174 *type_counts.entry(file.file_type.clone()).or_insert(0) += 1;
175 }
176
177 let mut stats = String::new();
178 stats.push_str("## Statistics\n\n");
179 stats.push_str(&format!("- Total files: {total_files}\n"));
180 stats.push_str(&format!("- Total size: {} bytes\n", format_size(total_size)));
181 stats.push_str("\n### Files by type:\n");
182
183 let mut types: Vec<_> = type_counts.into_iter().collect();
184 types.sort_by_key(|(_, count)| std::cmp::Reverse(*count));
185
186 for (file_type, count) in types {
187 stats.push_str(&format!("- {}: {}\n", file_type_display(&file_type), count));
188 }
189
190 stats
191}
192
193fn generate_file_tree(files: &[FileInfo]) -> String {
195 use std::collections::BTreeMap;
196
197 #[derive(Default)]
198 struct TreeNode {
199 files: Vec<String>,
200 dirs: BTreeMap<String, TreeNode>,
201 }
202
203 let mut root = TreeNode::default();
204
205 for file in files {
207 let parts: Vec<_> = file
208 .relative_path
209 .components()
210 .map(|c| c.as_os_str().to_string_lossy().to_string())
211 .collect();
212
213 let mut current = &mut root;
214 for (i, part) in parts.iter().enumerate() {
215 if i == parts.len() - 1 {
216 current.files.push(part.clone());
218 } else {
219 current = current.dirs.entry(part.clone()).or_default();
221 }
222 }
223 }
224
225 fn render_tree(node: &TreeNode, prefix: &str, _is_last: bool) -> String {
227 let mut output = String::new();
228
229 let dir_count = node.dirs.len();
231 for (i, (name, child)) in node.dirs.iter().enumerate() {
232 let is_last_dir = i == dir_count - 1 && node.files.is_empty();
233 let connector = if is_last_dir { "└── " } else { "├── " };
234 let extension = if is_last_dir { " " } else { "│ " };
235
236 output.push_str(&format!("{prefix}{connector}{name}/\n"));
237 output.push_str(&render_tree(child, &format!("{prefix}{extension}"), is_last_dir));
238 }
239
240 let file_count = node.files.len();
242 for (i, name) in node.files.iter().enumerate() {
243 let is_last_file = i == file_count - 1;
244 let connector = if is_last_file { "└── " } else { "├── " };
245 output.push_str(&format!("{prefix}{connector}{name}\n"));
246 }
247
248 output
249 }
250
251 let mut output = String::new();
252 output.push_str(".\n");
253 output.push_str(&render_tree(&root, "", true));
254 output
255}
256
257fn group_files_by_type(files: Vec<FileInfo>) -> Vec<(FileType, Vec<FileInfo>)> {
259 let mut groups: HashMap<FileType, Vec<FileInfo>> = HashMap::new();
260
261 for file in files {
262 groups.entry(file.file_type.clone()).or_default().push(file);
263 }
264
265 let mut result: Vec<_> = groups.into_iter().collect();
266 result.sort_by_key(|(file_type, _)| file_type_priority(file_type));
267 result
268}
269
270fn file_type_display(file_type: &FileType) -> &'static str {
272 match file_type {
273 FileType::Rust => "Rust",
274 FileType::Python => "Python",
275 FileType::JavaScript => "JavaScript",
276 FileType::TypeScript => "TypeScript",
277 FileType::Go => "Go",
278 FileType::Java => "Java",
279 FileType::Cpp => "C++",
280 FileType::C => "C",
281 FileType::CSharp => "C#",
282 FileType::Ruby => "Ruby",
283 FileType::Php => "PHP",
284 FileType::Swift => "Swift",
285 FileType::Kotlin => "Kotlin",
286 FileType::Scala => "Scala",
287 FileType::Haskell => "Haskell",
288 FileType::Markdown => "Markdown",
289 FileType::Json => "JSON",
290 FileType::Yaml => "YAML",
291 FileType::Toml => "TOML",
292 FileType::Xml => "XML",
293 FileType::Html => "HTML",
294 FileType::Css => "CSS",
295 FileType::Text => "Text",
296 FileType::Other => "Other",
297 }
298}
299
300fn get_language_hint(file_type: &FileType) -> &'static str {
302 match file_type {
303 FileType::Rust => "rust",
304 FileType::Python => "python",
305 FileType::JavaScript => "javascript",
306 FileType::TypeScript => "typescript",
307 FileType::Go => "go",
308 FileType::Java => "java",
309 FileType::Cpp => "cpp",
310 FileType::C => "c",
311 FileType::CSharp => "csharp",
312 FileType::Ruby => "ruby",
313 FileType::Php => "php",
314 FileType::Swift => "swift",
315 FileType::Kotlin => "kotlin",
316 FileType::Scala => "scala",
317 FileType::Haskell => "haskell",
318 FileType::Markdown => "markdown",
319 FileType::Json => "json",
320 FileType::Yaml => "yaml",
321 FileType::Toml => "toml",
322 FileType::Xml => "xml",
323 FileType::Html => "html",
324 FileType::Css => "css",
325 FileType::Text => "text",
326 FileType::Other => "",
327 }
328}
329
330fn file_type_priority(file_type: &FileType) -> u8 {
332 match file_type {
333 FileType::Rust => 1,
334 FileType::Python => 2,
335 FileType::JavaScript => 3,
336 FileType::TypeScript => 3,
337 FileType::Go => 4,
338 FileType::Java => 5,
339 FileType::Cpp => 6,
340 FileType::C => 7,
341 FileType::CSharp => 8,
342 FileType::Ruby => 9,
343 FileType::Php => 10,
344 FileType::Swift => 11,
345 FileType::Kotlin => 12,
346 FileType::Scala => 13,
347 FileType::Haskell => 14,
348 FileType::Markdown => 15,
349 FileType::Json => 16,
350 FileType::Yaml => 17,
351 FileType::Toml => 18,
352 FileType::Xml => 19,
353 FileType::Html => 20,
354 FileType::Css => 21,
355 FileType::Text => 22,
356 FileType::Other => 23,
357 }
358}
359
360fn path_to_anchor(path: &Path) -> String {
362 path.display().to_string().replace(['/', '\\', '.', ' '], "-").to_lowercase()
363}
364
365fn format_size(size: u64) -> String {
367 const UNITS: &[&str] = &["B", "KB", "MB", "GB"];
368 let mut size = size as f64;
369 let mut unit_index = 0;
370
371 while size >= 1024.0 && unit_index < UNITS.len() - 1 {
372 size /= 1024.0;
373 unit_index += 1;
374 }
375
376 if unit_index == 0 {
377 format!("{} {}", size as u64, UNITS[unit_index])
378 } else {
379 format!("{:.2} {}", size, UNITS[unit_index])
380 }
381}
382
383#[cfg(test)]
384mod tests {
385 use super::*;
386 use std::path::PathBuf;
387
388 #[test]
389 fn test_format_size() {
390 assert_eq!(format_size(512), "512 B");
391 assert_eq!(format_size(1024), "1.00 KB");
392 assert_eq!(format_size(1536), "1.50 KB");
393 assert_eq!(format_size(1048576), "1.00 MB");
394 }
395
396 #[test]
397 fn test_path_to_anchor() {
398 assert_eq!(path_to_anchor(Path::new("src/main.rs")), "src-main-rs");
399 assert_eq!(path_to_anchor(Path::new("test file.txt")), "test-file-txt");
400 }
401
402 #[test]
403 fn test_file_type_display() {
404 assert_eq!(file_type_display(&FileType::Rust), "Rust");
405 assert_eq!(file_type_display(&FileType::Python), "Python");
406 }
407
408 #[test]
409 fn test_generate_statistics() {
410 let files = vec![
411 FileInfo {
412 path: PathBuf::from("test1.rs"),
413 relative_path: PathBuf::from("test1.rs"),
414 size: 100,
415 file_type: FileType::Rust,
416 priority: 1.0,
417 },
418 FileInfo {
419 path: PathBuf::from("test2.py"),
420 relative_path: PathBuf::from("test2.py"),
421 size: 200,
422 file_type: FileType::Python,
423 priority: 0.9,
424 },
425 ];
426
427 let stats = generate_statistics(&files);
428 assert!(stats.contains("Total files: 2"));
429 assert!(stats.contains("Total size: 300 B"));
430 assert!(stats.contains("Rust: 1"));
431 assert!(stats.contains("Python: 1"));
432 }
433
434 #[test]
435 fn test_generate_statistics_empty() {
436 let files = vec![];
437 let stats = generate_statistics(&files);
438 assert!(stats.contains("Total files: 0"));
439 assert!(stats.contains("Total size: 0 B"));
440 }
441
442 #[test]
443 fn test_generate_statistics_large_files() {
444 let files = vec![
445 FileInfo {
446 path: PathBuf::from("large.rs"),
447 relative_path: PathBuf::from("large.rs"),
448 size: 2_000_000, file_type: FileType::Rust,
450 priority: 1.0,
451 },
452 FileInfo {
453 path: PathBuf::from("huge.py"),
454 relative_path: PathBuf::from("huge.py"),
455 size: 50_000_000, file_type: FileType::Python,
457 priority: 0.9,
458 },
459 ];
460
461 let stats = generate_statistics(&files);
462 assert!(stats.contains("Total files: 2"));
463 assert!(stats.contains("MB bytes")); assert!(stats.contains("Python: 1"));
465 assert!(stats.contains("Rust: 1"));
466 }
467
468 #[test]
469 fn test_generate_file_tree_with_grouping() {
470 let files = vec![
471 FileInfo {
472 path: PathBuf::from("src/main.rs"),
473 relative_path: PathBuf::from("src/main.rs"),
474 size: 1000,
475 file_type: FileType::Rust,
476 priority: 1.5,
477 },
478 FileInfo {
479 path: PathBuf::from("src/lib.rs"),
480 relative_path: PathBuf::from("src/lib.rs"),
481 size: 2000,
482 file_type: FileType::Rust,
483 priority: 1.2,
484 },
485 FileInfo {
486 path: PathBuf::from("tests/test.rs"),
487 relative_path: PathBuf::from("tests/test.rs"),
488 size: 500,
489 file_type: FileType::Rust,
490 priority: 0.8,
491 },
492 ];
493
494 let tree = generate_file_tree(&files);
495 assert!(tree.contains("src/"));
496 assert!(tree.contains("tests/"));
497 assert!(tree.contains("main.rs"));
498 assert!(tree.contains("lib.rs"));
499 assert!(tree.contains("test.rs"));
500 }
501
502 #[test]
503 fn test_digest_options_from_config() {
504 use crate::cli::Config;
505 use tempfile::TempDir;
506
507 let temp_dir = TempDir::new().unwrap();
508 let config = Config {
509 prompt: None,
510 prompt_flag: None,
511 directories: vec![temp_dir.path().to_path_buf()],
512 directories_positional: vec![],
513 output_file: None,
514 max_tokens: Some(100000),
515 llm_tool: crate::cli::LlmTool::default(),
516 quiet: false,
517 verbose: false,
518 config: None,
519 progress: false,
520 repo: None,
521 read_stdin: false,
522 };
523
524 let options = DigestOptions::from_config(&config).unwrap();
525 assert_eq!(options.max_tokens, Some(100000));
526 assert!(options.include_tree);
527 assert!(options.include_stats);
528 assert!(!options.group_by_type); }
530
531 #[test]
532 fn test_generate_markdown_structure_headers() {
533 let files = vec![];
534
535 let options = DigestOptions {
536 max_tokens: None,
537 include_tree: true,
538 include_stats: true,
539 group_by_type: true,
540 sort_by_priority: true,
541 file_header_template: "## {path}".to_string(),
542 doc_header_template: "# Code Digest".to_string(),
543 include_toc: true,
544 };
545
546 let markdown = generate_markdown(files, options).unwrap();
547
548 assert!(markdown.contains("# Code Digest"));
550 assert!(markdown.contains("## Statistics"));
551 assert!(markdown.contains("## Files"));
553 }
554}