context_builder/
markdown.rs

1use chrono::Utc;
2use ignore::DirEntry;
3use log::{error, info, warn};
4use std::fs;
5use std::io::{self, BufRead, BufReader, Read, Seek, SeekFrom, Write};
6use std::path::Path;
7
8use crate::tree::{FileTree, write_tree_to_file};
9
10/// Generates the final Markdown file.
11#[allow(clippy::too_many_arguments)]
12pub fn generate_markdown(
13    output_path: &str,
14    input_dir: &str,
15    filters: &[String],
16    ignores: &[String],
17    file_tree: &FileTree,
18    files: &[DirEntry],
19    base_path: &Path,
20    line_numbers: bool,
21) -> io::Result<()> {
22    if let Some(parent) = Path::new(output_path).parent()
23        && !parent.exists()
24    {
25        fs::create_dir_all(parent)?;
26    }
27
28    let mut output = fs::File::create(output_path)?;
29
30    let input_dir_name = if input_dir == "." {
31        let current_dir = std::env::current_dir()?;
32        current_dir
33            .file_name()
34            .unwrap()
35            .to_str()
36            .unwrap()
37            .to_string()
38    } else {
39        input_dir.to_string()
40    };
41
42    // --- Header --- //
43    writeln!(output, "# Directory Structure Report\n")?;
44
45    if !filters.is_empty() {
46        writeln!(
47            output,
48            "This document contains files from the `{}` directory with extensions: {}",
49            input_dir_name,
50            filters.join(", ")
51        )?;
52    } else {
53        writeln!(
54            output,
55            "This document contains all files from the `{}` directory, optimized for LLM consumption.",
56            input_dir_name
57        )?;
58    }
59
60    if !ignores.is_empty() {
61        writeln!(output, "Custom ignored patterns: {}", ignores.join(", "))?;
62    }
63
64    writeln!(
65        output,
66        "Processed at: {}",
67        Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
68    )?;
69    writeln!(output)?;
70
71    // --- File Tree --- //
72
73    writeln!(output, "## File Tree Structure\n")?;
74
75    write_tree_to_file(&mut output, file_tree, 0)?;
76
77    writeln!(output)?;
78
79    // (No '## Files' heading here; it will be injected later only once during final composition)
80    // (Diff section will be conditionally inserted later by the auto_diff logic in lib.rs)
81
82    #[cfg(feature = "parallel")]
83    {
84        use rayon::prelude::*;
85        let results: Vec<io::Result<Vec<u8>>> = files
86            .par_iter()
87            .map(|entry| {
88                let mut buf = Vec::new();
89                match process_file(base_path, entry.path(), &mut buf, line_numbers) {
90                    Ok(()) => Ok(buf),
91                    Err(e) => Err(e),
92                }
93            })
94            .collect();
95
96        for chunk in results {
97            match chunk {
98                Ok(buf) => output.write_all(&buf)?,
99                Err(e) => return Err(e),
100            }
101        }
102    }
103
104    #[cfg(not(feature = "parallel"))]
105    {
106        for entry in files {
107            process_file(base_path, entry.path(), &mut output, line_numbers)?;
108        }
109    }
110
111    Ok(())
112}
113
114/// Processes a single file and writes its content to the output.
115fn process_file(
116    base_path: &Path,
117
118    file_path: &Path,
119
120    output: &mut impl Write,
121    line_numbers: bool,
122) -> io::Result<()> {
123    let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
124    info!("Processing file: {}", relative_path.display());
125
126    let metadata = match fs::metadata(file_path) {
127        Ok(meta) => meta,
128        Err(e) => {
129            error!(
130                "Failed to get metadata for {}: {}",
131                relative_path.display(),
132                e
133            );
134            return Ok(());
135        }
136    };
137
138    let modified_time = metadata
139        .modified()
140        .ok()
141        .map(|time| {
142            let system_time: chrono::DateTime<Utc> = time.into();
143            system_time.format("%Y-%m-%d %H:%M:%S UTC").to_string()
144        })
145        .unwrap_or_else(|| "Unknown".to_string());
146
147    writeln!(output)?;
148    writeln!(output, "### File: `{}`", relative_path.display())?;
149
150    writeln!(output)?;
151
152    writeln!(output, "- Size: {} bytes", metadata.len())?;
153    writeln!(output, "- Modified: {}", modified_time)?;
154    writeln!(output)?;
155
156    // --- File Content --- //
157    let extension = file_path
158        .extension()
159        .and_then(|s| s.to_str())
160        .unwrap_or("text");
161    let language = match extension {
162        "rs" => "rust",
163        "js" => "javascript",
164        "ts" => "typescript",
165        "jsx" => "jsx",
166        "tsx" => "tsx",
167        "json" => "json",
168        "toml" => "toml",
169        "md" => "markdown",
170        "yaml" | "yml" => "yaml",
171        "html" => "html",
172        "css" => "css",
173        "py" => "python",
174        "java" => "java",
175        "cpp" => "cpp",
176        "c" => "c",
177        "h" => "c",
178        "hpp" => "cpp",
179        "sql" => "sql",
180        "sh" => "bash",
181        "xml" => "xml",
182        "lock" => "toml",
183        _ => extension,
184    };
185
186    // Stream file content for performance and handle binary files
187    // Peek into the file to determine if it's likely text (UTF-8) without loading an entire file
188    match fs::File::open(file_path) {
189        Ok(mut file) => {
190            let mut sniff = [0u8; 8192];
191            let n = match file.read(&mut sniff) {
192                Ok(n) => n,
193                Err(e) => {
194                    warn!(
195                        "Could not read file {}: {}. Skipping content.",
196                        relative_path.display(),
197                        e
198                    );
199
200                    writeln!(output, "```text")?;
201
202                    writeln!(
203                        output,
204                        "<Could not read file content (e.g., binary file or permission error)>"
205                    )?;
206
207                    writeln!(output, "```")?;
208
209                    return Ok(());
210                }
211            };
212            let slice = &sniff[..n];
213            let is_text = !slice.contains(&0) && std::str::from_utf8(slice).is_ok();
214
215            if !is_text {
216                warn!(
217                    "Detected non-text or binary file {}. Skipping content.",
218                    relative_path.display()
219                );
220                writeln!(output, "```text")?;
221                writeln!(
222                    output,
223                    "<Could not read file content (e.g., binary file or permission error)>"
224                )?;
225                writeln!(output, "```")?;
226                return Ok(());
227            }
228
229            // Reset cursor and stream the content
230            if let Err(e) = file.seek(SeekFrom::Start(0)) {
231                warn!(
232                    "Could not reset file cursor for {}: {}. Skipping content.",
233                    relative_path.display(),
234                    e
235                );
236                writeln!(output, "```text")?;
237                writeln!(
238                    output,
239                    "<Could not read file content (e.g., binary file or permission error)>"
240                )?;
241                writeln!(output, "```")?;
242                return Ok(());
243            }
244
245            writeln!(output, "```{}", language)?;
246            let mut reader = BufReader::new(file);
247
248            if line_numbers {
249                let mut buf = String::new();
250                let mut line_no: usize = 1;
251                loop {
252                    buf.clear();
253                    match reader.read_line(&mut buf) {
254                        Ok(0) => break,
255                        Ok(_) => {
256                            // Trim only trailing newline to avoid doubling
257                            let line = buf.strip_suffix('\n').unwrap_or(&buf);
258                            // Also handle Windows CRLF by trimming trailing '\r'
259                            let line = line.strip_suffix('\r').unwrap_or(line);
260                            writeln!(output, "{:>4} | {}", line_no, line)?;
261                            line_no += 1;
262                        }
263                        Err(e) => {
264                            warn!(
265                                "Error while reading {}: {}. Output may be truncated.",
266                                relative_path.display(),
267                                e
268                            );
269                            break;
270                        }
271                    }
272                }
273            } else {
274                // Fast path: stream bytes to output
275                if let Err(e) = std::io::copy(&mut reader, output) {
276                    warn!(
277                        "Error while streaming {}: {}. Output may be truncated.",
278                        relative_path.display(),
279                        e
280                    );
281                }
282            }
283            writeln!(output, "```")?;
284        }
285        Err(e) => {
286            warn!(
287                "Could not open file {}: {}. Skipping content.",
288                relative_path.display(),
289                e
290            );
291            writeln!(output, "```text")?;
292            writeln!(
293                output,
294                "<Could not read file content (e.g., binary file or permission error)>"
295            )?;
296            writeln!(output, "```")?;
297        }
298    }
299
300    Ok(())
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306    use std::fs;
307    use tempfile::tempdir;
308
309    #[test]
310    fn test_code_block_formatting() {
311        let dir = tempdir().unwrap();
312        let base_path = dir.path();
313        let file_path = base_path.join("test.rs");
314        let output_path = base_path.join("output.md");
315
316        // Create a test Rust file
317        fs::write(
318            &file_path,
319            "fn main() {\n    println!(\"Hello, world!\");\n}",
320        )
321        .unwrap();
322
323        // Create an output file
324        let mut output = fs::File::create(&output_path).unwrap();
325
326        // Process the file
327        process_file(base_path, &file_path, &mut output, false).unwrap();
328
329        // Read the output
330        let content = fs::read_to_string(&output_path).unwrap();
331
332        // Check that code blocks are properly formatted
333        assert!(content.contains("```rust"));
334        assert!(content.contains("```") && content.matches("```").count() >= 2);
335    }
336
337    #[test]
338    fn test_markdown_file_formatting() {
339        let dir = tempdir().unwrap();
340        let base_path = dir.path();
341        let file_path = base_path.join("README.md");
342        let output_path = base_path.join("output.md");
343
344        // Create a test Markdown file
345        fs::write(&file_path, "# Test\n\nThis is a test markdown file.").unwrap();
346
347        // Create an output file
348        let mut output = fs::File::create(&output_path).unwrap();
349
350        // Process the file
351        process_file(base_path, &file_path, &mut output, false).unwrap();
352
353        // Read the output
354        let content = fs::read_to_string(&output_path).unwrap();
355
356        // Debug prints the content
357        println!("Generated content:\n{}", content);
358
359        // Check that markdown files use the correct language identifier
360        assert!(
361            content.contains("```markdown"),
362            "Content should contain '```markdown' but was: {}",
363            content
364        );
365        // Count the number of code block markers
366        let code_block_markers = content.matches("```").count();
367
368        assert!(
369            code_block_markers >= 2,
370            "Expected at least 2 code block markers, found {}",
371            code_block_markers
372        );
373    }
374
375    #[test]
376    fn test_line_numbered_code_blocks() {
377        let dir = tempdir().unwrap();
378        let base_path = dir.path();
379        let file_path = base_path.join("lib.rs");
380        let output_path = base_path.join("out.md");
381
382        // Create a multi-line Rust file
383        fs::write(
384                    &file_path,
385                    "fn add(a: i32, b: i32) -> i32 {\n    a + b\n}\n\nfn main() {\n    println!(\"{}\", add(1, 2));\n}\n",
386                )
387                .unwrap();
388
389        let mut output = fs::File::create(&output_path).unwrap();
390        process_file(base_path, &file_path, &mut output, true).unwrap();
391
392        let content = fs::read_to_string(&output_path).unwrap();
393
394        // Check language and line numbers prefix
395        assert!(content.contains("```rust"));
396        assert!(content.contains("   1 | "));
397        assert!(content.contains("   2 | "));
398
399        // Count lines with "|" prefix equals number of lines in an original file
400        let numbered_lines = content
401            .lines()
402            .filter(|l| {
403                l.trim_start()
404                    .chars()
405                    .next()
406                    .map(|c| c.is_ascii_digit())
407                    .unwrap_or(false)
408                    && l.contains(" | ")
409            })
410            .count();
411        let original_line_count = fs::read_to_string(&file_path).unwrap().lines().count();
412        assert_eq!(numbered_lines, original_line_count);
413
414        // Ensure code fence closes
415        assert!(content.contains("```"));
416    }
417
418    #[test]
419    fn test_binary_file_handling() {
420        let dir = tempdir().unwrap();
421        let base_path = dir.path();
422        let file_path = base_path.join("image.bin");
423        let output_path = base_path.join("out.md");
424
425        // Write some non-UTF8 bytes
426        let bytes = vec![0u8, 159, 146, 150, 255, 0, 1, 2];
427        fs::write(&file_path, bytes).unwrap();
428
429        let mut output = fs::File::create(&output_path).unwrap();
430        process_file(base_path, &file_path, &mut output, false).unwrap();
431
432        let content = fs::read_to_string(&output_path).unwrap();
433
434        // Expect a text block to fall back with a helpful message
435        assert!(content.contains("```text"));
436        assert!(
437            content
438                .contains("<Could not read file content (e.g., binary file or permission error)>")
439        );
440
441        // Ensure the code block is closed
442        let fence_count = content.matches("```").count();
443        assert!(
444            fence_count >= 2,
445            "expected at least opening and closing fences, got {}",
446            fence_count
447        );
448    }
449}