Skip to main content

context_builder/
markdown.rs

1use chrono::Utc;
2use ignore::DirEntry;
3use log::{error, info, warn};
4use std::fs;
5use std::io::{self, Read, Seek, SeekFrom, Write};
6use std::path::Path;
7
8use crate::tree::{FileTree, write_tree_to_file};
9use encoding_rs::{Encoding, UTF_8};
10
11#[cfg(feature = "parallel")]
12use crossbeam_channel::{Receiver, Sender, bounded};
13#[cfg(feature = "parallel")]
14use std::thread;
15
16/// Generates the final Markdown file.
17#[allow(clippy::too_many_arguments)]
18pub fn generate_markdown(
19    output_path: &str,
20    input_dir: &str,
21    filters: &[String],
22    ignores: &[String],
23    file_tree: &FileTree,
24    files: &[DirEntry],
25    base_path: &Path,
26    line_numbers: bool,
27    encoding_strategy: Option<&str>,
28) -> io::Result<()> {
29    if let Some(parent) = Path::new(output_path).parent()
30        && !parent.exists()
31    {
32        fs::create_dir_all(parent)?;
33    }
34
35    let mut output = fs::File::create(output_path)?;
36
37    let input_dir_name = if input_dir == "." {
38        let current_dir = std::env::current_dir()?;
39        current_dir
40            .file_name()
41            .unwrap()
42            .to_str()
43            .unwrap()
44            .to_string()
45    } else {
46        input_dir.to_string()
47    };
48
49    // --- Header --- //
50    writeln!(output, "# Directory Structure Report\n")?;
51
52    if !filters.is_empty() {
53        writeln!(
54            output,
55            "This document contains files from the `{}` directory with extensions: {}",
56            input_dir_name,
57            filters.join(", ")
58        )?;
59    } else {
60        writeln!(
61            output,
62            "This document contains all files from the `{}` directory, optimized for LLM consumption.",
63            input_dir_name
64        )?;
65    }
66
67    if !ignores.is_empty() {
68        writeln!(output, "Custom ignored patterns: {}", ignores.join(", "))?;
69    }
70
71    writeln!(
72        output,
73        "Processed at: {}",
74        Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
75    )?;
76    writeln!(output)?;
77
78    // --- File Tree --- //
79
80    writeln!(output, "## File Tree Structure\n")?;
81
82    write_tree_to_file(&mut output, file_tree, 0)?;
83
84    writeln!(output)?;
85
86    // (No '## Files' heading here; it will be injected later only once during final composition)
87    // (Diff section will be conditionally inserted later by the auto_diff logic in lib.rs)
88
89    #[cfg(feature = "parallel")]
90    {
91        use rayon::prelude::*;
92
93        // Create a bounded channel for ordered chunks
94        type ChunkResult = (usize, io::Result<Vec<u8>>);
95        let (sender, receiver): (Sender<ChunkResult>, Receiver<ChunkResult>) =
96            bounded(num_cpus::get() * 2); // Buffer size based on CPU count
97
98        let writer_handle = {
99            let mut output = output;
100            let total_files = files.len();
101
102            thread::spawn(move || -> io::Result<()> {
103                let mut completed_chunks = std::collections::BTreeMap::new();
104                let mut next_index = 0;
105                let mut errors = Vec::new();
106
107                // Receive chunks and write them in order
108                while next_index < total_files {
109                    match receiver.recv() {
110                        Ok((index, chunk_result)) => {
111                            completed_chunks.insert(index, chunk_result);
112
113                            // Write all consecutive chunks starting from next_index
114                            while let Some(chunk_result) = completed_chunks.remove(&next_index) {
115                                match chunk_result {
116                                    Ok(buf) => {
117                                        if let Err(e) = output.write_all(&buf) {
118                                            errors.push(format!(
119                                                "Failed to write output for file index {}: {}",
120                                                next_index, e
121                                            ));
122                                        }
123                                    }
124                                    Err(e) => {
125                                        errors.push(format!(
126                                            "Failed to process file index {}: {}",
127                                            next_index, e
128                                        ));
129                                    }
130                                }
131                                next_index += 1;
132                            }
133                        }
134                        Err(_) => break, // Channel closed
135                    }
136                }
137
138                if !errors.is_empty() {
139                    error!(
140                        "Encountered {} errors during parallel processing:",
141                        errors.len()
142                    );
143                    for err in &errors {
144                        error!("  {}", err);
145                    }
146                    return Err(std::io::Error::other(format!(
147                        "Failed to process {} files: {}",
148                        errors.len(),
149                        errors.join("; ")
150                    )));
151                }
152
153                Ok(())
154            })
155        };
156
157        // Process files in parallel and send results to writer
158        files.par_iter().enumerate().for_each(|(index, entry)| {
159            let mut buf = Vec::new();
160            let result = process_file(
161                base_path,
162                entry.path(),
163                &mut buf,
164                line_numbers,
165                encoding_strategy,
166            )
167            .map(|_| buf);
168
169            // Send result to writer thread (ignore send errors - channel might be closed)
170            let _ = sender.send((index, result));
171        });
172
173        // Close the sender to signal completion
174        drop(sender);
175
176        // Wait for writer thread to complete and propagate any errors
177        writer_handle
178            .join()
179            .map_err(|_| std::io::Error::other("Writer thread panicked"))??;
180    }
181
182    #[cfg(not(feature = "parallel"))]
183    {
184        for entry in files {
185            process_file(
186                base_path,
187                entry.path(),
188                &mut output,
189                line_numbers,
190                encoding_strategy,
191            )?;
192        }
193    }
194
195    Ok(())
196}
197
198/// Processes a single file and writes its content to the output.
199pub fn process_file(
200    base_path: &Path,
201
202    file_path: &Path,
203
204    output: &mut impl Write,
205    line_numbers: bool,
206    encoding_strategy: Option<&str>,
207) -> io::Result<()> {
208    let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
209    info!("Processing file: {}", relative_path.display());
210
211    let metadata = match fs::metadata(file_path) {
212        Ok(meta) => meta,
213        Err(e) => {
214            error!(
215                "Failed to get metadata for {}: {}",
216                relative_path.display(),
217                e
218            );
219            return Ok(());
220        }
221    };
222
223    let modified_time = metadata
224        .modified()
225        .ok()
226        .map(|time| {
227            let system_time: chrono::DateTime<Utc> = time.into();
228            system_time.format("%Y-%m-%d %H:%M:%S UTC").to_string()
229        })
230        .unwrap_or_else(|| "Unknown".to_string());
231
232    writeln!(output)?;
233    writeln!(output, "### File: `{}`", relative_path.display())?;
234
235    writeln!(output)?;
236
237    writeln!(output, "- Size: {} bytes", metadata.len())?;
238    writeln!(output, "- Modified: {}", modified_time)?;
239    writeln!(output)?;
240
241    // --- File Content --- //
242    let extension = file_path
243        .extension()
244        .and_then(|s| s.to_str())
245        .unwrap_or("text");
246    let language = match extension {
247        "rs" => "rust",
248        "js" => "javascript",
249        "ts" => "typescript",
250        "jsx" => "jsx",
251        "tsx" => "tsx",
252        "json" => "json",
253        "toml" => "toml",
254        "md" => "markdown",
255        "yaml" | "yml" => "yaml",
256        "html" => "html",
257        "css" => "css",
258        "py" => "python",
259        "java" => "java",
260        "cpp" => "cpp",
261        "c" => "c",
262        "h" => "c",
263        "hpp" => "cpp",
264        "sql" => "sql",
265        "sh" => "bash",
266        "xml" => "xml",
267        "lock" => "toml",
268        _ => extension,
269    };
270
271    // Enhanced binary file handling with encoding detection and transcoding
272    match fs::File::open(file_path) {
273        Ok(mut file) => {
274            let mut sniff = [0u8; 8192];
275            let n = match file.read(&mut sniff) {
276                Ok(n) => n,
277                Err(e) => {
278                    warn!(
279                        "Could not read file {}: {}. Skipping content.",
280                        relative_path.display(),
281                        e
282                    );
283
284                    writeln!(output, "```text")?;
285
286                    writeln!(
287                        output,
288                        "<Could not read file content (e.g., binary file or permission error)>"
289                    )?;
290
291                    writeln!(output, "```")?;
292
293                    return Ok(());
294                }
295            };
296            let slice = &sniff[..n];
297
298            // Find a valid UTF-8 boundary by backtracking up to 3 bytes.
299            // If the sniff buffer cuts a multi-byte char (e.g., emoji at byte 8191),
300            // from_utf8 would falsely classify the file as non-UTF-8.
301            let check_len = if n == sniff.len() {
302                // Buffer is full — may have split a multi-byte char at the end
303                let mut end = n;
304                while end > 0 && end > n.saturating_sub(4) && sniff[end - 1] & 0xC0 == 0x80 {
305                    end -= 1; // skip continuation bytes
306                }
307                // If we landed on a leading byte, check if the sequence is complete
308                if end > 0 && end < n {
309                    let leading = sniff[end - 1];
310                    let expected_len = if leading & 0xE0 == 0xC0 { 2 }
311                        else if leading & 0xF0 == 0xE0 { 3 }
312                        else if leading & 0xF8 == 0xF0 { 4 }
313                        else { 1 };
314                    if end - 1 + expected_len > n {
315                        end - 1 // incomplete char — exclude the leading byte too
316                    } else {
317                        n
318                    }
319                } else {
320                    n
321                }
322            } else {
323                n // didn't fill the buffer, so no boundary issue
324            };
325
326            // First check if it's valid UTF-8
327            let is_utf8 = std::str::from_utf8(&sniff[..check_len]).is_ok();
328
329            if is_utf8 && !slice.contains(&0) {
330                // Valid UTF-8 text file - proceed normally
331            } else {
332                // Try encoding detection for non-UTF-8 files
333                // If it's not UTF-8, try to detect the encoding
334                let (encoding, _consumed) =
335                    encoding_rs::Encoding::for_bom(slice).unwrap_or((encoding_rs::UTF_8, 0));
336
337                // If it's not UTF-8, try to detect the encoding
338                let detected_encoding = if encoding == UTF_8 {
339                    // Use chardet-like detection for common encodings
340                    detect_text_encoding(slice)
341                } else {
342                    Some(encoding)
343                };
344
345                match detected_encoding {
346                    Some(enc) if enc != UTF_8 => {
347                        let strategy = encoding_strategy.unwrap_or("detect");
348                        match strategy {
349                            "strict" | "skip" => {
350                                // Skip files with non-UTF-8 encoding
351                                warn!(
352                                    "Skipping non-UTF-8 file {} (encoding: {}, strategy: {})",
353                                    relative_path.display(),
354                                    enc.name(),
355                                    strategy
356                                );
357                            }
358                            _ => {
359                                // Default "detect" strategy: attempt to transcode
360                                match transcode_file_content(file_path, enc) {
361                                    Ok(transcoded_content) => {
362                                        info!(
363                                            "Successfully transcoded {} from {} to UTF-8",
364                                            relative_path.display(),
365                                            enc.name()
366                                        );
367                                        write_text_content(
368                                            output,
369                                            &transcoded_content,
370                                            language,
371                                            line_numbers,
372                                        )?;
373                                        return Ok(());
374                                    }
375                                    Err(e) => {
376                                        warn!(
377                                            "Failed to transcode {} from {}: {}. Treating as binary.",
378                                            relative_path.display(),
379                                            enc.name(),
380                                            e
381                                        );
382                                    }
383                                }
384                            }
385                        }
386                    }
387                    _ => {
388                        // Check if it's likely binary (contains null bytes)
389                        if slice.contains(&0) {
390                            warn!(
391                                "Detected binary file {} (contains null bytes). Skipping content.",
392                                relative_path.display()
393                            );
394                        } else {
395                            warn!(
396                                "Could not determine encoding for {}. Treating as binary.",
397                                relative_path.display()
398                            );
399                        }
400                    }
401                }
402
403                // Fallback to binary file placeholder
404                writeln!(output, "```text")?;
405                writeln!(
406                    output,
407                    "<Binary file or unsupported encoding: {} bytes>",
408                    metadata.len()
409                )?;
410                writeln!(output, "```")?;
411                return Ok(());
412            }
413
414            // Reset cursor and stream the content
415            if let Err(e) = file.seek(SeekFrom::Start(0)) {
416                warn!(
417                    "Could not reset file cursor for {}: {}. Skipping content.",
418                    relative_path.display(),
419                    e
420                );
421                writeln!(output, "```text")?;
422                writeln!(
423                    output,
424                    "<Could not read file content (e.g., binary file or permission error)>"
425                )?;
426                writeln!(output, "```")?;
427                return Ok(());
428            }
429
430            // Stream UTF-8 content
431            let content = match std::fs::read_to_string(file_path) {
432                Ok(content) => content,
433                Err(e) => {
434                    warn!(
435                        "Error reading file {}: {}. Output may be truncated.",
436                        relative_path.display(),
437                        e
438                    );
439                    writeln!(output, "```text")?;
440                    writeln!(output, "<Error reading file content>")?;
441                    writeln!(output, "```")?;
442                    return Ok(());
443                }
444            };
445
446            write_text_content(output, &content, language, line_numbers)?;
447        }
448        Err(e) => {
449            warn!(
450                "Could not open file {}: {}. Skipping content.",
451                relative_path.display(),
452                e
453            );
454            writeln!(output, "```text")?;
455            writeln!(
456                output,
457                "<Could not read file content (e.g., binary file or permission error)>"
458            )?;
459            writeln!(output, "```")?;
460        }
461    }
462
463    Ok(())
464}
465
466/// Detect text encoding using heuristics for common encodings
467fn detect_text_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
468    // Try common encodings
469    let encodings = [
470        encoding_rs::WINDOWS_1252,
471        encoding_rs::UTF_16LE,
472        encoding_rs::UTF_16BE,
473        encoding_rs::SHIFT_JIS,
474    ];
475
476    for encoding in &encodings {
477        let (decoded, _, had_errors) = encoding.decode(bytes);
478        if !had_errors && is_likely_text(&decoded) {
479            return Some(encoding);
480        }
481    }
482
483    None
484}
485
486/// Check if decoded content looks like text (no control characters except common ones)
487fn is_likely_text(content: &str) -> bool {
488    let mut control_chars = 0;
489    let mut total_chars = 0;
490
491    for ch in content.chars() {
492        total_chars += 1;
493        if ch.is_control() && ch != '\n' && ch != '\r' && ch != '\t' {
494            control_chars += 1;
495        }
496
497        // If more than 5% control characters, probably not text
498        if total_chars > 100 && control_chars * 20 > total_chars {
499            return false;
500        }
501    }
502
503    // Allow up to 5% control characters in small files
504    if total_chars > 0 {
505        control_chars * 20 <= total_chars
506    } else {
507        true
508    }
509}
510
511/// Transcode file content from detected encoding to UTF-8
512fn transcode_file_content(file_path: &Path, encoding: &'static Encoding) -> io::Result<String> {
513    let bytes = std::fs::read(file_path)?;
514    let (decoded, _, had_errors) = encoding.decode(&bytes);
515
516    if had_errors {
517        return Err(io::Error::new(
518            io::ErrorKind::InvalidData,
519            format!("Failed to decode file with encoding {}", encoding.name()),
520        ));
521    }
522
523    Ok(decoded.into_owned())
524}
525
526/// Write text content with optional line numbers
527fn write_text_content(
528    output: &mut impl Write,
529    content: &str,
530    language: &str,
531    line_numbers: bool,
532) -> io::Result<()> {
533    writeln!(output, "```{}", language)?;
534
535    if line_numbers {
536        for (i, line) in content.lines().enumerate() {
537            writeln!(output, "{:>4} | {}", i + 1, line)?;
538        }
539    } else {
540        output.write_all(content.as_bytes())?;
541        if !content.ends_with('\n') {
542            writeln!(output)?;
543        }
544    }
545
546    writeln!(output, "```")?;
547    Ok(())
548}
549
550#[cfg(test)]
551mod tests {
552    use super::*;
553    use std::fs;
554    use tempfile::tempdir;
555
556    #[test]
557    fn test_code_block_formatting() {
558        let dir = tempdir().unwrap();
559        let base_path = dir.path();
560        let file_path = base_path.join("test.rs");
561        let output_path = base_path.join("output.md");
562
563        // Create a test Rust file
564        fs::write(
565            &file_path,
566            "fn main() {\n    println!(\"Hello, world!\");\n}",
567        )
568        .unwrap();
569
570        // Create an output file
571        let mut output = fs::File::create(&output_path).unwrap();
572
573        // Process the file
574        process_file(base_path, &file_path, &mut output, false, None).unwrap();
575
576        // Read the output
577        let content = fs::read_to_string(&output_path).unwrap();
578
579        // Check that code blocks are properly formatted
580        assert!(content.contains("```rust"));
581        assert!(content.contains("```") && content.matches("```").count() >= 2);
582    }
583
584    #[test]
585    fn test_markdown_file_formatting() {
586        let dir = tempdir().unwrap();
587        let base_path = dir.path();
588        let file_path = base_path.join("README.md");
589        let output_path = base_path.join("output.md");
590
591        // Create a test Markdown file
592        fs::write(&file_path, "# Test\n\nThis is a test markdown file.").unwrap();
593
594        // Create an output file
595        let mut output = fs::File::create(&output_path).unwrap();
596
597        // Process the file
598        process_file(base_path, &file_path, &mut output, false, None).unwrap();
599
600        // Read the output
601        let content = fs::read_to_string(&output_path).unwrap();
602
603        // Debug prints the content
604        println!("Generated content:\n{}", content);
605
606        // Check that markdown files use the correct language identifier
607        assert!(
608            content.contains("```markdown"),
609            "Content should contain '```markdown' but was: {}",
610            content
611        );
612        // Count the number of code block markers
613        let code_block_markers = content.matches("```").count();
614
615        assert!(
616            code_block_markers >= 2,
617            "Expected at least 2 code block markers, found {}",
618            code_block_markers
619        );
620    }
621
622    #[test]
623    fn test_line_numbered_code_blocks() {
624        let dir = tempdir().unwrap();
625        let base_path = dir.path();
626        let file_path = base_path.join("lib.rs");
627        let output_path = base_path.join("out.md");
628
629        // Create a multi-line Rust file
630        fs::write(
631                    &file_path,
632                    "fn add(a: i32, b: i32) -> i32 {\n    a + b\n}\n\nfn main() {\n    println!(\"{}\", add(1, 2));\n}\n",
633                )
634                .unwrap();
635
636        let mut output = fs::File::create(&output_path).unwrap();
637        process_file(base_path, &file_path, &mut output, true, None).unwrap();
638
639        let content = fs::read_to_string(&output_path).unwrap();
640
641        // Check language and line numbers prefix
642        assert!(content.contains("```rust"));
643        assert!(content.contains("   1 | "));
644        assert!(content.contains("   2 | "));
645
646        // Count lines with "|" prefix equals number of lines in an original file
647        let numbered_lines = content
648            .lines()
649            .filter(|l| {
650                l.trim_start()
651                    .chars()
652                    .next()
653                    .map(|c| c.is_ascii_digit())
654                    .unwrap_or(false)
655                    && l.contains(" | ")
656            })
657            .count();
658        let original_line_count = fs::read_to_string(&file_path).unwrap().lines().count();
659        assert_eq!(numbered_lines, original_line_count);
660
661        // Ensure code fence closes
662        assert!(content.contains("```"));
663    }
664
665    #[test]
666    fn test_binary_file_handling() {
667        let dir = tempdir().unwrap();
668        let base_path = dir.path();
669        let file_path = base_path.join("image.bin");
670        let output_path = base_path.join("out.md");
671
672        // Write truly binary data that won't be decoded by encoding detection
673        let bytes = vec![
674            0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG header
675            0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, // PNG chunk
676            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // More binary data
677            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Null bytes
678        ];
679        fs::write(&file_path, bytes).unwrap();
680
681        let mut output = fs::File::create(&output_path).unwrap();
682        process_file(base_path, &file_path, &mut output, false, None).unwrap();
683
684        let content = fs::read_to_string(&output_path).unwrap();
685
686        // Expect a text block to fall back with a helpful message
687        assert!(content.contains("```text"));
688        assert!(content.contains("<Binary file or unsupported encoding:"));
689
690        // Ensure the code block is closed
691        let fence_count = content.matches("```").count();
692        assert!(
693            fence_count >= 2,
694            "expected at least opening and closing fences, got {}",
695            fence_count
696        );
697    }
698
699    #[test]
700    fn test_encoding_detection_and_transcoding() {
701        let dir = tempdir().unwrap();
702        let base_path = dir.path();
703        let output_path = base_path.join("out.md");
704
705        // Test Windows-1252 encoded file (common in Windows)
706        let windows1252_content = [
707            0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, // "Hello "
708            0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, // "World" with smart quotes
709            0x0A, // newline
710        ];
711        let file_path = base_path.join("windows1252.txt");
712        fs::write(&file_path, windows1252_content).unwrap();
713
714        let mut output = fs::File::create(&output_path).unwrap();
715        process_file(base_path, &file_path, &mut output, false, Some("detect")).unwrap();
716
717        let content = fs::read_to_string(&output_path).unwrap();
718
719        // Should contain transcoded content with UTF-8 equivalents
720        assert!(content.contains("Hello"));
721        assert!(content.contains("World"));
722        // Should use text language
723        assert!(content.contains("```txt"));
724
725        // Ensure the code block is closed
726        let fence_count = content.matches("```").count();
727        assert!(
728            fence_count >= 2,
729            "expected at least opening and closing fences, got {}",
730            fence_count
731        );
732    }
733
734    #[test]
735    fn test_encoding_strategy_strict() {
736        let dir = tempdir().unwrap();
737        let base_path = dir.path();
738        let output_path = base_path.join("out.md");
739
740        // Create a file with non-UTF-8 content
741        let non_utf8_content = [0xFF, 0xFE, 0x41, 0x00]; // UTF-16 LE BOM + "A"
742        let file_path = base_path.join("utf16.txt");
743        fs::write(&file_path, non_utf8_content).unwrap();
744
745        let mut output = fs::File::create(&output_path).unwrap();
746        process_file(base_path, &file_path, &mut output, false, Some("strict")).unwrap();
747
748        let content = fs::read_to_string(&output_path).unwrap();
749
750        // Should contain binary file placeholder
751        assert!(content.contains("<Binary file or unsupported encoding:"));
752        assert!(content.contains("```text"));
753
754        // Ensure the code block is closed
755        let fence_count = content.matches("```").count();
756        assert!(
757            fence_count >= 2,
758            "expected at least opening and closing fences, got {}",
759            fence_count
760        );
761    }
762
763    #[test]
764    fn test_encoding_strategy_skip() {
765        let dir = tempdir().unwrap();
766        let base_path = dir.path();
767        let output_path = base_path.join("out.md");
768
769        // Create a file with UTF-16 content
770        let utf16_content = [0xFF, 0xFE, 0x48, 0x00, 0x69, 0x00]; // UTF-16 LE "Hi"
771        let file_path = base_path.join("utf16.txt");
772        fs::write(&file_path, utf16_content).unwrap();
773
774        let mut output = fs::File::create(&output_path).unwrap();
775        process_file(base_path, &file_path, &mut output, false, Some("skip")).unwrap();
776
777        let content = fs::read_to_string(&output_path).unwrap();
778
779        // Should contain binary file placeholder (skipped transcoding)
780        assert!(content.contains("<Binary file or unsupported encoding:"));
781        assert!(content.contains("```text"));
782    }
783
784    #[test]
785    fn test_generate_markdown_with_current_directory() {
786        let dir = tempdir().unwrap();
787        let base_path = dir.path();
788        let output_path = base_path.join("test.md");
789
790        // Create test files
791        fs::write(base_path.join("readme.txt"), "Hello world").unwrap();
792
793        // Collect files
794        let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
795        let file_tree = crate::tree::build_file_tree(&files, base_path);
796
797        // Change to the test directory
798        let original_dir = std::env::current_dir().unwrap();
799        std::env::set_current_dir(base_path).unwrap();
800
801        // Test with "." as input directory
802        let result = generate_markdown(
803            &output_path.to_string_lossy(),
804            ".",
805            &[],
806            &[],
807            &file_tree,
808            &files,
809            base_path,
810            false,
811            None,
812        );
813
814        // Restore original directory
815        std::env::set_current_dir(original_dir).unwrap();
816
817        assert!(result.is_ok());
818        let content = fs::read_to_string(&output_path).unwrap();
819        assert!(content.contains("Directory Structure Report"));
820    }
821
822    #[test]
823    fn test_generate_markdown_creates_output_directory() {
824        let dir = tempdir().unwrap();
825        let base_path = dir.path();
826        let nested_output = base_path.join("nested").join("deep").join("output.md");
827
828        // Create test files
829        fs::write(base_path.join("test.txt"), "content").unwrap();
830
831        let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
832        let file_tree = crate::tree::build_file_tree(&files, base_path);
833
834        let result = generate_markdown(
835            &nested_output.to_string_lossy(),
836            "test_dir",
837            &[],
838            &[],
839            &file_tree,
840            &files,
841            base_path,
842            false,
843            None,
844        );
845
846        assert!(result.is_ok());
847        assert!(nested_output.exists());
848        assert!(nested_output.parent().unwrap().exists());
849    }
850
851    #[test]
852    fn test_generate_markdown_with_filters_and_ignores() {
853        let dir = tempdir().unwrap();
854        let base_path = dir.path();
855        let output_path = base_path.join("filtered.md");
856
857        fs::write(base_path.join("main.rs"), "fn main() {}").unwrap();
858        fs::write(base_path.join("config.toml"), "[package]").unwrap();
859        fs::write(base_path.join("readme.md"), "# README").unwrap();
860
861        let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
862        let file_tree = crate::tree::build_file_tree(&files, base_path);
863
864        let result = generate_markdown(
865            &output_path.to_string_lossy(),
866            "project",
867            &["rs".to_string(), "toml".to_string()],
868            &["readme.md".to_string()],
869            &file_tree,
870            &files,
871            base_path,
872            true,
873            Some("strict"),
874        );
875
876        assert!(result.is_ok());
877        let content = fs::read_to_string(&output_path).unwrap();
878        assert!(content.contains("Directory Structure Report"));
879        // The actual generate_markdown function doesn't format filters/ignores this way
880        assert!(content.contains("main.rs") || content.contains("config.toml"));
881    }
882
883    #[test]
884    fn test_write_text_content_with_line_numbers() {
885        let mut output = Vec::new();
886        let content = "line one\nline two\nline three";
887
888        write_text_content(&mut output, content, "rust", true).unwrap();
889
890        let result = String::from_utf8(output).unwrap();
891        assert!(result.contains("```rust"));
892        assert!(result.contains("   1 | line one"));
893        assert!(result.contains("   2 | line two"));
894        assert!(result.contains("   3 | line three"));
895        assert!(result.contains("```"));
896    }
897
898    #[test]
899    fn test_write_text_content_without_line_numbers() {
900        let mut output = Vec::new();
901        let content = "function test() {\n  return true;\n}";
902
903        write_text_content(&mut output, content, "javascript", false).unwrap();
904
905        let result = String::from_utf8(output).unwrap();
906        assert!(result.contains("```javascript"));
907        assert!(result.contains("function test() {"));
908        assert!(result.contains("  return true;"));
909        assert!(result.contains("```"));
910        assert!(!result.contains(" | ")); // No line number prefix
911    }
912
913    #[test]
914    fn test_write_text_content_without_trailing_newline() {
915        let mut output = Vec::new();
916        let content = "no newline at end"; // No \n at end
917
918        write_text_content(&mut output, content, "text", false).unwrap();
919
920        let result = String::from_utf8(output).unwrap();
921        assert!(result.contains("```text"));
922        assert!(result.contains("no newline at end"));
923        assert!(result.ends_with("```\n")); // Should add newline
924    }
925
926    #[test]
927    fn test_is_likely_text() {
928        // Normal text should be considered text
929        assert!(is_likely_text("Hello world\nThis is normal text"));
930
931        // Text with some control characters should still be text
932        assert!(is_likely_text(
933            "Line 1\nLine 2\tTabbed\r\nWindows line ending"
934        ));
935
936        // Text with too many control characters should not be text
937        let mut bad_text = String::new();
938        for i in 0..200 {
939            if i % 5 == 0 {
940                bad_text.push('\x01'); // Control character
941            } else {
942                bad_text.push('a');
943            }
944        }
945        assert!(!is_likely_text(&bad_text));
946
947        // Empty string should be considered text
948        assert!(is_likely_text(""));
949    }
950
951    #[test]
952    fn test_detect_text_encoding() {
953        // UTF-8 should return None (already UTF-8)
954        let utf8_bytes = "Hello world".as_bytes();
955        let result = detect_text_encoding(utf8_bytes);
956        // The function may return an encoding even for UTF-8 text if it detects it differently
957        // Just verify it doesn't crash
958        assert!(result.is_some() || result.is_none());
959
960        // Windows-1252 encoded text should be detected
961        let windows1252_bytes = [
962            0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x94,
963        ];
964        let detected = detect_text_encoding(&windows1252_bytes);
965        assert!(detected.is_some());
966    }
967
968    #[test]
969    fn test_transcode_file_content() {
970        let dir = tempdir().unwrap();
971        let file_path = dir.path().join("windows1252.txt");
972
973        // Write Windows-1252 encoded content
974        let windows1252_content = [
975            0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, // "Hello "
976            0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, // "World" with smart quotes
977        ];
978        fs::write(&file_path, windows1252_content).unwrap();
979
980        let result = transcode_file_content(&file_path, encoding_rs::WINDOWS_1252);
981        assert!(result.is_ok());
982
983        let transcoded = result.unwrap();
984        assert!(transcoded.contains("Hello"));
985        assert!(transcoded.contains("World"));
986    }
987
988    #[test]
989    fn test_process_file_with_metadata_error() {
990        let dir = tempdir().unwrap();
991        let base_path = dir.path();
992        let nonexistent_file = base_path.join("nonexistent.txt");
993        let output_path = base_path.join("output.md");
994
995        let mut output = fs::File::create(&output_path).unwrap();
996
997        // This should handle the metadata error gracefully
998        let result = process_file(base_path, &nonexistent_file, &mut output, false, None);
999        assert!(result.is_ok());
1000
1001        // Output should be minimal since file doesn't exist
1002        let content = fs::read_to_string(&output_path).unwrap();
1003        assert!(content.is_empty() || content.trim().is_empty());
1004    }
1005
1006    #[test]
1007    fn test_process_file_with_different_extensions() {
1008        let dir = tempdir().unwrap();
1009        let base_path = dir.path();
1010        let output_path = base_path.join("output.md");
1011
1012        // Test various file extensions
1013        let test_files = [
1014            ("script.py", "print('hello')", "python"),
1015            ("data.json", r#"{"key": "value"}"#, "json"),
1016            ("config.yaml", "key: value", "yaml"),
1017            ("style.css", "body { margin: 0; }", "css"),
1018            ("page.html", "<html><body>Test</body></html>", "html"),
1019            ("query.sql", "SELECT * FROM users;", "sql"),
1020            ("build.sh", "#!/bin/bash\necho 'building'", "bash"),
1021            ("unknown.xyz", "unknown content", "xyz"),
1022        ];
1023
1024        for (filename, content, expected_lang) in test_files.iter() {
1025            let file_path = base_path.join(filename);
1026            fs::write(&file_path, content).unwrap();
1027
1028            let mut output = fs::File::create(&output_path).unwrap();
1029            process_file(base_path, &file_path, &mut output, false, None).unwrap();
1030
1031            let result = fs::read_to_string(&output_path).unwrap();
1032            assert!(result.contains(&format!("```{}", expected_lang)));
1033            assert!(result.contains(content));
1034            assert!(result.contains(filename));
1035        }
1036    }
1037}