1use chrono::Utc;
2use ignore::DirEntry;
3use log::{error, info, warn};
4use std::fs;
5use std::io::{self, BufRead, BufReader, Read, Seek, SeekFrom, Write};
6use std::path::Path;
7
8use crate::tree::{FileTree, write_tree_to_file};
9
10#[allow(clippy::too_many_arguments)]
12pub fn generate_markdown(
13 output_path: &str,
14 input_dir: &str,
15 filters: &[String],
16 ignores: &[String],
17 file_tree: &FileTree,
18 files: &[DirEntry],
19 base_path: &Path,
20 line_numbers: bool,
21) -> io::Result<()> {
22 if let Some(parent) = Path::new(output_path).parent()
23 && !parent.exists()
24 {
25 fs::create_dir_all(parent)?;
26 }
27
28 let mut output = fs::File::create(output_path)?;
29
30 let input_dir_name = if input_dir == "." {
31 let current_dir = std::env::current_dir()?;
32 current_dir
33 .file_name()
34 .unwrap()
35 .to_str()
36 .unwrap()
37 .to_string()
38 } else {
39 input_dir.to_string()
40 };
41
42 writeln!(output, "# Directory Structure Report\n")?;
44
45 if !filters.is_empty() {
46 writeln!(
47 output,
48 "This document contains files from the `{}` directory with extensions: {}",
49 input_dir_name,
50 filters.join(", ")
51 )?;
52 } else {
53 writeln!(
54 output,
55 "This document contains all files from the `{}` directory, optimized for LLM consumption.",
56 input_dir_name
57 )?;
58 }
59
60 if !ignores.is_empty() {
61 writeln!(output, "Custom ignored patterns: {}", ignores.join(", "))?;
62 }
63
64 writeln!(
65 output,
66 "Processed at: {}",
67 Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
68 )?;
69 writeln!(output)?;
70
71 writeln!(output, "## File Tree Structure\n")?;
74
75 write_tree_to_file(&mut output, file_tree, 0)?;
76
77 writeln!(output)?;
78
79 #[cfg(feature = "parallel")]
83 {
84 use rayon::prelude::*;
85 let results: Vec<io::Result<Vec<u8>>> = files
86 .par_iter()
87 .map(|entry| {
88 let mut buf = Vec::new();
89 match process_file(base_path, entry.path(), &mut buf, line_numbers) {
90 Ok(()) => Ok(buf),
91 Err(e) => Err(e),
92 }
93 })
94 .collect();
95
96 for chunk in results {
97 match chunk {
98 Ok(buf) => output.write_all(&buf)?,
99 Err(e) => return Err(e),
100 }
101 }
102 }
103
104 #[cfg(not(feature = "parallel"))]
105 {
106 for entry in files {
107 process_file(base_path, entry.path(), &mut output, line_numbers)?;
108 }
109 }
110
111 Ok(())
112}
113
114fn process_file(
116 base_path: &Path,
117
118 file_path: &Path,
119
120 output: &mut impl Write,
121 line_numbers: bool,
122) -> io::Result<()> {
123 let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
124 info!("Processing file: {}", relative_path.display());
125
126 let metadata = match fs::metadata(file_path) {
127 Ok(meta) => meta,
128 Err(e) => {
129 error!(
130 "Failed to get metadata for {}: {}",
131 relative_path.display(),
132 e
133 );
134 return Ok(());
135 }
136 };
137
138 let modified_time = metadata
139 .modified()
140 .ok()
141 .map(|time| {
142 let system_time: chrono::DateTime<Utc> = time.into();
143 system_time.format("%Y-%m-%d %H:%M:%S UTC").to_string()
144 })
145 .unwrap_or_else(|| "Unknown".to_string());
146
147 writeln!(output)?;
148 writeln!(output, "### File: `{}`", relative_path.display())?;
149
150 writeln!(output)?;
151
152 writeln!(output, "- Size: {} bytes", metadata.len())?;
153 writeln!(output, "- Modified: {}", modified_time)?;
154 writeln!(output)?;
155
156 let extension = file_path
158 .extension()
159 .and_then(|s| s.to_str())
160 .unwrap_or("text");
161 let language = match extension {
162 "rs" => "rust",
163 "js" => "javascript",
164 "ts" => "typescript",
165 "jsx" => "jsx",
166 "tsx" => "tsx",
167 "json" => "json",
168 "toml" => "toml",
169 "md" => "markdown",
170 "yaml" | "yml" => "yaml",
171 "html" => "html",
172 "css" => "css",
173 "py" => "python",
174 "java" => "java",
175 "cpp" => "cpp",
176 "c" => "c",
177 "h" => "c",
178 "hpp" => "cpp",
179 "sql" => "sql",
180 "sh" => "bash",
181 "xml" => "xml",
182 "lock" => "toml",
183 _ => extension,
184 };
185
186 match fs::File::open(file_path) {
189 Ok(mut file) => {
190 let mut sniff = [0u8; 8192];
191 let n = match file.read(&mut sniff) {
192 Ok(n) => n,
193 Err(e) => {
194 warn!(
195 "Could not read file {}: {}. Skipping content.",
196 relative_path.display(),
197 e
198 );
199
200 writeln!(output, "```text")?;
201
202 writeln!(
203 output,
204 "<Could not read file content (e.g., binary file or permission error)>"
205 )?;
206
207 writeln!(output, "```")?;
208
209 return Ok(());
210 }
211 };
212 let slice = &sniff[..n];
213 let is_text = !slice.contains(&0) && std::str::from_utf8(slice).is_ok();
214
215 if !is_text {
216 warn!(
217 "Detected non-text or binary file {}. Skipping content.",
218 relative_path.display()
219 );
220 writeln!(output, "```text")?;
221 writeln!(
222 output,
223 "<Could not read file content (e.g., binary file or permission error)>"
224 )?;
225 writeln!(output, "```")?;
226 return Ok(());
227 }
228
229 if let Err(e) = file.seek(SeekFrom::Start(0)) {
231 warn!(
232 "Could not reset file cursor for {}: {}. Skipping content.",
233 relative_path.display(),
234 e
235 );
236 writeln!(output, "```text")?;
237 writeln!(
238 output,
239 "<Could not read file content (e.g., binary file or permission error)>"
240 )?;
241 writeln!(output, "```")?;
242 return Ok(());
243 }
244
245 writeln!(output, "```{}", language)?;
246 let mut reader = BufReader::new(file);
247
248 if line_numbers {
249 let mut buf = String::new();
250 let mut line_no: usize = 1;
251 loop {
252 buf.clear();
253 match reader.read_line(&mut buf) {
254 Ok(0) => break,
255 Ok(_) => {
256 let line = buf.strip_suffix('\n').unwrap_or(&buf);
258 let line = line.strip_suffix('\r').unwrap_or(line);
260 writeln!(output, "{:>4} | {}", line_no, line)?;
261 line_no += 1;
262 }
263 Err(e) => {
264 warn!(
265 "Error while reading {}: {}. Output may be truncated.",
266 relative_path.display(),
267 e
268 );
269 break;
270 }
271 }
272 }
273 } else {
274 if let Err(e) = std::io::copy(&mut reader, output) {
276 warn!(
277 "Error while streaming {}: {}. Output may be truncated.",
278 relative_path.display(),
279 e
280 );
281 }
282 }
283 writeln!(output, "```")?;
284 }
285 Err(e) => {
286 warn!(
287 "Could not open file {}: {}. Skipping content.",
288 relative_path.display(),
289 e
290 );
291 writeln!(output, "```text")?;
292 writeln!(
293 output,
294 "<Could not read file content (e.g., binary file or permission error)>"
295 )?;
296 writeln!(output, "```")?;
297 }
298 }
299
300 Ok(())
301}
302
303#[cfg(test)]
304mod tests {
305 use super::*;
306 use std::fs;
307 use tempfile::tempdir;
308
309 #[test]
310 fn test_code_block_formatting() {
311 let dir = tempdir().unwrap();
312 let base_path = dir.path();
313 let file_path = base_path.join("test.rs");
314 let output_path = base_path.join("output.md");
315
316 fs::write(
318 &file_path,
319 "fn main() {\n println!(\"Hello, world!\");\n}",
320 )
321 .unwrap();
322
323 let mut output = fs::File::create(&output_path).unwrap();
325
326 process_file(base_path, &file_path, &mut output, false).unwrap();
328
329 let content = fs::read_to_string(&output_path).unwrap();
331
332 assert!(content.contains("```rust"));
334 assert!(content.contains("```") && content.matches("```").count() >= 2);
335 }
336
337 #[test]
338 fn test_markdown_file_formatting() {
339 let dir = tempdir().unwrap();
340 let base_path = dir.path();
341 let file_path = base_path.join("README.md");
342 let output_path = base_path.join("output.md");
343
344 fs::write(&file_path, "# Test\n\nThis is a test markdown file.").unwrap();
346
347 let mut output = fs::File::create(&output_path).unwrap();
349
350 process_file(base_path, &file_path, &mut output, false).unwrap();
352
353 let content = fs::read_to_string(&output_path).unwrap();
355
356 println!("Generated content:\n{}", content);
358
359 assert!(
361 content.contains("```markdown"),
362 "Content should contain '```markdown' but was: {}",
363 content
364 );
365 let code_block_markers = content.matches("```").count();
367
368 assert!(
369 code_block_markers >= 2,
370 "Expected at least 2 code block markers, found {}",
371 code_block_markers
372 );
373 }
374
375 #[test]
376 fn test_line_numbered_code_blocks() {
377 let dir = tempdir().unwrap();
378 let base_path = dir.path();
379 let file_path = base_path.join("lib.rs");
380 let output_path = base_path.join("out.md");
381
382 fs::write(
384 &file_path,
385 "fn add(a: i32, b: i32) -> i32 {\n a + b\n}\n\nfn main() {\n println!(\"{}\", add(1, 2));\n}\n",
386 )
387 .unwrap();
388
389 let mut output = fs::File::create(&output_path).unwrap();
390 process_file(base_path, &file_path, &mut output, true).unwrap();
391
392 let content = fs::read_to_string(&output_path).unwrap();
393
394 assert!(content.contains("```rust"));
396 assert!(content.contains(" 1 | "));
397 assert!(content.contains(" 2 | "));
398
399 let numbered_lines = content
401 .lines()
402 .filter(|l| {
403 l.trim_start()
404 .chars()
405 .next()
406 .map(|c| c.is_ascii_digit())
407 .unwrap_or(false)
408 && l.contains(" | ")
409 })
410 .count();
411 let original_line_count = fs::read_to_string(&file_path).unwrap().lines().count();
412 assert_eq!(numbered_lines, original_line_count);
413
414 assert!(content.contains("```"));
416 }
417
418 #[test]
419 fn test_binary_file_handling() {
420 let dir = tempdir().unwrap();
421 let base_path = dir.path();
422 let file_path = base_path.join("image.bin");
423 let output_path = base_path.join("out.md");
424
425 let bytes = vec![0u8, 159, 146, 150, 255, 0, 1, 2];
427 fs::write(&file_path, bytes).unwrap();
428
429 let mut output = fs::File::create(&output_path).unwrap();
430 process_file(base_path, &file_path, &mut output, false).unwrap();
431
432 let content = fs::read_to_string(&output_path).unwrap();
433
434 assert!(content.contains("```text"));
436 assert!(
437 content
438 .contains("<Could not read file content (e.g., binary file or permission error)>")
439 );
440
441 let fence_count = content.matches("```").count();
443 assert!(
444 fence_count >= 2,
445 "expected at least opening and closing fences, got {}",
446 fence_count
447 );
448 }
449}