context_builder/
markdown.rs

1use chrono::Utc;
2use ignore::DirEntry;
3use log::{error, info, warn};
4use std::fs;
5use std::io::{self, BufRead, BufReader, Read, Seek, SeekFrom, Write};
6use std::path::Path;
7
8use crate::tree::{FileTree, write_tree_to_file};
9
10/// Generates the final Markdown file.
11#[allow(clippy::too_many_arguments)]
12pub fn generate_markdown(
13    output_path: &str,
14    input_dir: &str,
15    filters: &[String],
16    ignores: &[String],
17    file_tree: &FileTree,
18    files: &[DirEntry],
19    base_path: &Path,
20    line_numbers: bool,
21) -> io::Result<()> {
22    let mut output = fs::File::create(output_path)?;
23
24    // --- Header --- //
25    writeln!(output, "# Directory Structure Report\n")?;
26
27    if !filters.is_empty() {
28        writeln!(
29            output,
30            "This document contains files from the `{}` directory with extensions: {}",
31            input_dir,
32            filters.join(", ")
33        )?;
34    } else {
35        writeln!(
36            output,
37            "This document contains all files from the `{}` directory, optimized for LLM consumption.",
38            input_dir
39        )?;
40    }
41
42    if !ignores.is_empty() {
43        writeln!(output, "Custom ignored patterns: {}", ignores.join(", "))?;
44    }
45
46    writeln!(
47        output,
48        "Processed at: {}",
49        Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
50    )?;
51    writeln!(output)?;
52
53    // --- File Tree --- //
54    writeln!(output, "## File Tree Structure\n")?;
55    write_tree_to_file(&mut output, file_tree, 0)?;
56
57    // --- File Contents --- //
58
59    #[cfg(feature = "parallel")]
60    {
61        use rayon::prelude::*;
62        let results: Vec<io::Result<Vec<u8>>> = files
63            .par_iter()
64            .map(|entry| {
65                let mut buf = Vec::new();
66                match process_file(base_path, entry.path(), &mut buf, line_numbers) {
67                    Ok(()) => Ok(buf),
68                    Err(e) => Err(e),
69                }
70            })
71            .collect();
72
73        for chunk in results {
74            match chunk {
75                Ok(buf) => output.write_all(&buf)?,
76                Err(e) => return Err(e),
77            }
78        }
79    }
80
81    #[cfg(not(feature = "parallel"))]
82    {
83        for entry in files {
84            process_file(base_path, entry.path(), &mut output, line_numbers)?;
85        }
86    }
87
88    Ok(())
89}
90
91/// Processes a single file and writes its content to the output.
92fn process_file(
93    base_path: &Path,
94
95    file_path: &Path,
96
97    output: &mut impl Write,
98    line_numbers: bool,
99) -> io::Result<()> {
100    let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
101    info!("Processing file: {}", relative_path.display());
102
103    let metadata = match fs::metadata(file_path) {
104        Ok(meta) => meta,
105        Err(e) => {
106            error!(
107                "Failed to get metadata for {}: {}",
108                relative_path.display(),
109                e
110            );
111            return Ok(());
112        }
113    };
114
115    let modified_time = metadata
116        .modified()
117        .ok()
118        .map(|time| {
119            let system_time: chrono::DateTime<Utc> = time.into();
120            system_time.format("%Y-%m-%d %H:%M:%S UTC").to_string()
121        })
122        .unwrap_or_else(|| "Unknown".to_string());
123
124    // --- File Header --- //
125    writeln!(output)?;
126    writeln!(output, "## File: `{}`", relative_path.display())?;
127    writeln!(output)?;
128    writeln!(output, "- Size: {} bytes", metadata.len())?;
129    writeln!(output, "- Modified: {}", modified_time)?;
130    writeln!(output)?;
131
132    // --- File Content --- //
133    let extension = file_path
134        .extension()
135        .and_then(|s| s.to_str())
136        .unwrap_or("text");
137    let language = match extension {
138        "rs" => "rust",
139        "js" => "javascript",
140        "ts" => "typescript",
141        "jsx" => "jsx",
142        "tsx" => "tsx",
143        "json" => "json",
144        "toml" => "toml",
145        "md" => "markdown",
146        "yaml" | "yml" => "yaml",
147        "html" => "html",
148        "css" => "css",
149        "py" => "python",
150        "java" => "java",
151        "cpp" => "cpp",
152        "c" => "c",
153        "h" => "c",
154        "hpp" => "cpp",
155        "sql" => "sql",
156        "sh" => "bash",
157        "xml" => "xml",
158        "lock" => "toml",
159        _ => extension,
160    };
161
162    // Stream file content for performance and handle binary files
163    // Peek into the file to determine if it's likely text (UTF-8) without loading an entire file
164    match fs::File::open(file_path) {
165        Ok(mut file) => {
166            let mut sniff = [0u8; 8192];
167            let n = match file.read(&mut sniff) {
168                Ok(n) => n,
169                Err(e) => {
170                    warn!(
171                        "Could not read file {}: {}. Skipping content.",
172                        relative_path.display(),
173                        e
174                    );
175
176                    writeln!(output, "```text")?;
177
178                    writeln!(
179                        output,
180                        "<Could not read file content (e.g., binary file or permission error)>"
181                    )?;
182
183                    writeln!(output, "```")?;
184
185                    return Ok(());
186                }
187            };
188            let slice = &sniff[..n];
189            let is_text = !slice.contains(&0) && std::str::from_utf8(slice).is_ok();
190
191            if !is_text {
192                warn!(
193                    "Detected non-text or binary file {}. Skipping content.",
194                    relative_path.display()
195                );
196                writeln!(output, "```text")?;
197                writeln!(
198                    output,
199                    "<Could not read file content (e.g., binary file or permission error)>"
200                )?;
201                writeln!(output, "```")?;
202                return Ok(());
203            }
204
205            // Reset cursor and stream the content
206            if let Err(e) = file.seek(SeekFrom::Start(0)) {
207                warn!(
208                    "Could not reset file cursor for {}: {}. Skipping content.",
209                    relative_path.display(),
210                    e
211                );
212                writeln!(output, "```text")?;
213                writeln!(
214                    output,
215                    "<Could not read file content (e.g., binary file or permission error)>"
216                )?;
217                writeln!(output, "```")?;
218                return Ok(());
219            }
220
221            writeln!(output, "```{}", language)?;
222            let mut reader = BufReader::new(file);
223
224            if line_numbers {
225                let mut buf = String::new();
226                let mut line_no: usize = 1;
227                loop {
228                    buf.clear();
229                    match reader.read_line(&mut buf) {
230                        Ok(0) => break,
231                        Ok(_) => {
232                            // Trim only trailing newline to avoid doubling
233                            let line = buf.strip_suffix('\n').unwrap_or(&buf);
234                            // Also handle Windows CRLF by trimming trailing '\r'
235                            let line = line.strip_suffix('\r').unwrap_or(line);
236                            writeln!(output, "{:>4} | {}", line_no, line)?;
237                            line_no += 1;
238                        }
239                        Err(e) => {
240                            warn!(
241                                "Error while reading {}: {}. Output may be truncated.",
242                                relative_path.display(),
243                                e
244                            );
245                            break;
246                        }
247                    }
248                }
249            } else {
250                // Fast path: stream bytes to output
251                if let Err(e) = std::io::copy(&mut reader, output) {
252                    warn!(
253                        "Error while streaming {}: {}. Output may be truncated.",
254                        relative_path.display(),
255                        e
256                    );
257                }
258            }
259            writeln!(output, "```")?;
260        }
261        Err(e) => {
262            warn!(
263                "Could not open file {}: {}. Skipping content.",
264                relative_path.display(),
265                e
266            );
267            writeln!(output, "```text")?;
268            writeln!(
269                output,
270                "<Could not read file content (e.g., binary file or permission error)>"
271            )?;
272            writeln!(output, "```")?;
273        }
274    }
275
276    Ok(())
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282    use std::fs;
283    use tempfile::tempdir;
284
285    #[test]
286    fn test_code_block_formatting() {
287        let dir = tempdir().unwrap();
288        let base_path = dir.path();
289        let file_path = base_path.join("test.rs");
290        let output_path = base_path.join("output.md");
291
292        // Create a test Rust file
293        fs::write(
294            &file_path,
295            "fn main() {\n    println!(\"Hello, world!\");\n}",
296        )
297        .unwrap();
298
299        // Create an output file
300        let mut output = fs::File::create(&output_path).unwrap();
301
302        // Process the file
303        process_file(base_path, &file_path, &mut output, false).unwrap();
304
305        // Read the output
306        let content = fs::read_to_string(&output_path).unwrap();
307
308        // Check that code blocks are properly formatted
309        assert!(content.contains("```rust"));
310        assert!(content.contains("```") && content.matches("```").count() >= 2);
311    }
312
313    #[test]
314    fn test_markdown_file_formatting() {
315        let dir = tempdir().unwrap();
316        let base_path = dir.path();
317        let file_path = base_path.join("README.md");
318        let output_path = base_path.join("output.md");
319
320        // Create a test Markdown file
321        fs::write(&file_path, "# Test\n\nThis is a test markdown file.").unwrap();
322
323        // Create an output file
324        let mut output = fs::File::create(&output_path).unwrap();
325
326        // Process the file
327        process_file(base_path, &file_path, &mut output, false).unwrap();
328
329        // Read the output
330        let content = fs::read_to_string(&output_path).unwrap();
331
332        // Debug prints the content
333        println!("Generated content:\n{}", content);
334
335        // Check that markdown files use the correct language identifier
336        assert!(
337            content.contains("```markdown"),
338            "Content should contain '```markdown' but was: {}",
339            content
340        );
341        // Count the number of code block markers
342        let code_block_markers = content.matches("```").count();
343
344        assert!(
345            code_block_markers >= 2,
346            "Expected at least 2 code block markers, found {}",
347            code_block_markers
348        );
349    }
350
351    #[test]
352    fn test_line_numbered_code_blocks() {
353        let dir = tempdir().unwrap();
354        let base_path = dir.path();
355        let file_path = base_path.join("lib.rs");
356        let output_path = base_path.join("out.md");
357
358        // Create a multi-line Rust file
359        fs::write(
360                    &file_path,
361                    "fn add(a: i32, b: i32) -> i32 {\n    a + b\n}\n\nfn main() {\n    println!(\"{}\", add(1, 2));\n}\n",
362                )
363                .unwrap();
364
365        let mut output = fs::File::create(&output_path).unwrap();
366        process_file(base_path, &file_path, &mut output, true).unwrap();
367
368        let content = fs::read_to_string(&output_path).unwrap();
369
370        // Check language and line numbers prefix
371        assert!(content.contains("```rust"));
372        assert!(content.contains("   1 | "));
373        assert!(content.contains("   2 | "));
374
375        // Count lines with "|" prefix equals number of lines in an original file
376        let numbered_lines = content
377            .lines()
378            .filter(|l| {
379                l.trim_start()
380                    .chars()
381                    .next()
382                    .map(|c| c.is_ascii_digit())
383                    .unwrap_or(false)
384                    && l.contains(" | ")
385            })
386            .count();
387        let original_line_count = fs::read_to_string(&file_path).unwrap().lines().count();
388        assert_eq!(numbered_lines, original_line_count);
389
390        // Ensure code fence closes
391        assert!(content.contains("```"));
392    }
393
394    #[test]
395    fn test_binary_file_handling() {
396        let dir = tempdir().unwrap();
397        let base_path = dir.path();
398        let file_path = base_path.join("image.bin");
399        let output_path = base_path.join("out.md");
400
401        // Write some non-UTF8 bytes
402        let bytes = vec![0u8, 159, 146, 150, 255, 0, 1, 2];
403        fs::write(&file_path, bytes).unwrap();
404
405        let mut output = fs::File::create(&output_path).unwrap();
406        process_file(base_path, &file_path, &mut output, false).unwrap();
407
408        let content = fs::read_to_string(&output_path).unwrap();
409
410        // Expect a text block to fall back with a helpful message
411        assert!(content.contains("```text"));
412        assert!(
413            content
414                .contains("<Could not read file content (e.g., binary file or permission error)>")
415        );
416
417        // Ensure the code block is closed
418        let fence_count = content.matches("```").count();
419        assert!(
420            fence_count >= 2,
421            "expected at least opening and closing fences, got {}",
422            fence_count
423        );
424    }
425}