context_builder/
markdown.rs

1use chrono::Utc;
2use ignore::DirEntry;
3use log::{error, info, warn};
4use std::fs;
5use std::io::{self, Read, Seek, SeekFrom, Write};
6use std::path::Path;
7
8use crate::tree::{FileTree, write_tree_to_file};
9use encoding_rs::{Encoding, UTF_8};
10
11#[cfg(feature = "parallel")]
12use crossbeam_channel::{Receiver, Sender, bounded};
13#[cfg(feature = "parallel")]
14use std::thread;
15
16/// Generates the final Markdown file.
17#[allow(clippy::too_many_arguments)]
18pub fn generate_markdown(
19    output_path: &str,
20    input_dir: &str,
21    filters: &[String],
22    ignores: &[String],
23    file_tree: &FileTree,
24    files: &[DirEntry],
25    base_path: &Path,
26    line_numbers: bool,
27    encoding_strategy: Option<&str>,
28) -> io::Result<()> {
29    if let Some(parent) = Path::new(output_path).parent()
30        && !parent.exists()
31    {
32        fs::create_dir_all(parent)?;
33    }
34
35    let mut output = fs::File::create(output_path)?;
36
37    let input_dir_name = if input_dir == "." {
38        let current_dir = std::env::current_dir()?;
39        current_dir
40            .file_name()
41            .unwrap()
42            .to_str()
43            .unwrap()
44            .to_string()
45    } else {
46        input_dir.to_string()
47    };
48
49    // --- Header --- //
50    writeln!(output, "# Directory Structure Report\n")?;
51
52    if !filters.is_empty() {
53        writeln!(
54            output,
55            "This document contains files from the `{}` directory with extensions: {}",
56            input_dir_name,
57            filters.join(", ")
58        )?;
59    } else {
60        writeln!(
61            output,
62            "This document contains all files from the `{}` directory, optimized for LLM consumption.",
63            input_dir_name
64        )?;
65    }
66
67    if !ignores.is_empty() {
68        writeln!(output, "Custom ignored patterns: {}", ignores.join(", "))?;
69    }
70
71    writeln!(
72        output,
73        "Processed at: {}",
74        Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
75    )?;
76    writeln!(output)?;
77
78    // --- File Tree --- //
79
80    writeln!(output, "## File Tree Structure\n")?;
81
82    write_tree_to_file(&mut output, file_tree, 0)?;
83
84    writeln!(output)?;
85
86    // (No '## Files' heading here; it will be injected later only once during final composition)
87    // (Diff section will be conditionally inserted later by the auto_diff logic in lib.rs)
88
89    #[cfg(feature = "parallel")]
90    {
91        use rayon::prelude::*;
92
93        // Create a bounded channel for ordered chunks
94        type ChunkResult = (usize, io::Result<Vec<u8>>);
95        let (sender, receiver): (Sender<ChunkResult>, Receiver<ChunkResult>) =
96            bounded(num_cpus::get() * 2); // Buffer size based on CPU count
97
98        let writer_handle = {
99            let mut output = output;
100            let total_files = files.len();
101
102            thread::spawn(move || -> io::Result<()> {
103                let mut completed_chunks = std::collections::BTreeMap::new();
104                let mut next_index = 0;
105                let mut errors = Vec::new();
106
107                // Receive chunks and write them in order
108                while next_index < total_files {
109                    match receiver.recv() {
110                        Ok((index, chunk_result)) => {
111                            completed_chunks.insert(index, chunk_result);
112
113                            // Write all consecutive chunks starting from next_index
114                            while let Some(chunk_result) = completed_chunks.remove(&next_index) {
115                                match chunk_result {
116                                    Ok(buf) => {
117                                        if let Err(e) = output.write_all(&buf) {
118                                            errors.push(format!(
119                                                "Failed to write output for file index {}: {}",
120                                                next_index, e
121                                            ));
122                                        }
123                                    }
124                                    Err(e) => {
125                                        errors.push(format!(
126                                            "Failed to process file index {}: {}",
127                                            next_index, e
128                                        ));
129                                    }
130                                }
131                                next_index += 1;
132                            }
133                        }
134                        Err(_) => break, // Channel closed
135                    }
136                }
137
138                if !errors.is_empty() {
139                    error!(
140                        "Encountered {} errors during parallel processing:",
141                        errors.len()
142                    );
143                    for err in &errors {
144                        error!("  {}", err);
145                    }
146                    return Err(std::io::Error::other(format!(
147                        "Failed to process {} files: {}",
148                        errors.len(),
149                        errors.join("; ")
150                    )));
151                }
152
153                Ok(())
154            })
155        };
156
157        // Process files in parallel and send results to writer
158        files.par_iter().enumerate().for_each(|(index, entry)| {
159            let mut buf = Vec::new();
160            let result = process_file(
161                base_path,
162                entry.path(),
163                &mut buf,
164                line_numbers,
165                encoding_strategy,
166            )
167            .map(|_| buf);
168
169            // Send result to writer thread (ignore send errors - channel might be closed)
170            let _ = sender.send((index, result));
171        });
172
173        // Close the sender to signal completion
174        drop(sender);
175
176        // Wait for writer thread to complete and propagate any errors
177        writer_handle
178            .join()
179            .map_err(|_| std::io::Error::other("Writer thread panicked"))??;
180    }
181
182    #[cfg(not(feature = "parallel"))]
183    {
184        for entry in files {
185            process_file(
186                base_path,
187                entry.path(),
188                &mut output,
189                line_numbers,
190                encoding_strategy,
191            )?;
192        }
193    }
194
195    Ok(())
196}
197
198/// Processes a single file and writes its content to the output.
199pub fn process_file(
200    base_path: &Path,
201
202    file_path: &Path,
203
204    output: &mut impl Write,
205    line_numbers: bool,
206    encoding_strategy: Option<&str>,
207) -> io::Result<()> {
208    let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
209    info!("Processing file: {}", relative_path.display());
210
211    let metadata = match fs::metadata(file_path) {
212        Ok(meta) => meta,
213        Err(e) => {
214            error!(
215                "Failed to get metadata for {}: {}",
216                relative_path.display(),
217                e
218            );
219            return Ok(());
220        }
221    };
222
223    let modified_time = metadata
224        .modified()
225        .ok()
226        .map(|time| {
227            let system_time: chrono::DateTime<Utc> = time.into();
228            system_time.format("%Y-%m-%d %H:%M:%S UTC").to_string()
229        })
230        .unwrap_or_else(|| "Unknown".to_string());
231
232    writeln!(output)?;
233    writeln!(output, "### File: `{}`", relative_path.display())?;
234
235    writeln!(output)?;
236
237    writeln!(output, "- Size: {} bytes", metadata.len())?;
238    writeln!(output, "- Modified: {}", modified_time)?;
239    writeln!(output)?;
240
241    // --- File Content --- //
242    let extension = file_path
243        .extension()
244        .and_then(|s| s.to_str())
245        .unwrap_or("text");
246    let language = match extension {
247        "rs" => "rust",
248        "js" => "javascript",
249        "ts" => "typescript",
250        "jsx" => "jsx",
251        "tsx" => "tsx",
252        "json" => "json",
253        "toml" => "toml",
254        "md" => "markdown",
255        "yaml" | "yml" => "yaml",
256        "html" => "html",
257        "css" => "css",
258        "py" => "python",
259        "java" => "java",
260        "cpp" => "cpp",
261        "c" => "c",
262        "h" => "c",
263        "hpp" => "cpp",
264        "sql" => "sql",
265        "sh" => "bash",
266        "xml" => "xml",
267        "lock" => "toml",
268        _ => extension,
269    };
270
271    // Enhanced binary file handling with encoding detection and transcoding
272    match fs::File::open(file_path) {
273        Ok(mut file) => {
274            let mut sniff = [0u8; 8192];
275            let n = match file.read(&mut sniff) {
276                Ok(n) => n,
277                Err(e) => {
278                    warn!(
279                        "Could not read file {}: {}. Skipping content.",
280                        relative_path.display(),
281                        e
282                    );
283
284                    writeln!(output, "```text")?;
285
286                    writeln!(
287                        output,
288                        "<Could not read file content (e.g., binary file or permission error)>"
289                    )?;
290
291                    writeln!(output, "```")?;
292
293                    return Ok(());
294                }
295            };
296            let slice = &sniff[..n];
297
298            // First check if it's valid UTF-8
299            let is_utf8 = std::str::from_utf8(slice).is_ok();
300
301            if is_utf8 && !slice.contains(&0) {
302                // Valid UTF-8 text file - proceed normally
303            } else {
304                // Try encoding detection for non-UTF-8 files
305                // If it's not UTF-8, try to detect the encoding
306                let (encoding, _consumed) =
307                    encoding_rs::Encoding::for_bom(slice).unwrap_or((encoding_rs::UTF_8, 0));
308
309                // If it's not UTF-8, try to detect the encoding
310                let detected_encoding = if encoding == UTF_8 {
311                    // Use chardet-like detection for common encodings
312                    detect_text_encoding(slice)
313                } else {
314                    Some(encoding)
315                };
316
317                match detected_encoding {
318                    Some(enc) if enc != UTF_8 => {
319                        let strategy = encoding_strategy.unwrap_or("detect");
320                        match strategy {
321                            "strict" | "skip" => {
322                                // Skip files with non-UTF-8 encoding
323                                warn!(
324                                    "Skipping non-UTF-8 file {} (encoding: {}, strategy: {})",
325                                    relative_path.display(),
326                                    enc.name(),
327                                    strategy
328                                );
329                            }
330                            _ => {
331                                // Default "detect" strategy: attempt to transcode
332                                match transcode_file_content(file_path, enc) {
333                                    Ok(transcoded_content) => {
334                                        info!(
335                                            "Successfully transcoded {} from {} to UTF-8",
336                                            relative_path.display(),
337                                            enc.name()
338                                        );
339                                        write_text_content(
340                                            output,
341                                            &transcoded_content,
342                                            language,
343                                            line_numbers,
344                                        )?;
345                                        return Ok(());
346                                    }
347                                    Err(e) => {
348                                        warn!(
349                                            "Failed to transcode {} from {}: {}. Treating as binary.",
350                                            relative_path.display(),
351                                            enc.name(),
352                                            e
353                                        );
354                                    }
355                                }
356                            }
357                        }
358                    }
359                    _ => {
360                        // Check if it's likely binary (contains null bytes)
361                        if slice.contains(&0) {
362                            warn!(
363                                "Detected binary file {} (contains null bytes). Skipping content.",
364                                relative_path.display()
365                            );
366                        } else {
367                            warn!(
368                                "Could not determine encoding for {}. Treating as binary.",
369                                relative_path.display()
370                            );
371                        }
372                    }
373                }
374
375                // Fallback to binary file placeholder
376                writeln!(output, "```text")?;
377                writeln!(
378                    output,
379                    "<Binary file or unsupported encoding: {} bytes>",
380                    metadata.len()
381                )?;
382                writeln!(output, "```")?;
383                return Ok(());
384            }
385
386            // Reset cursor and stream the content
387            if let Err(e) = file.seek(SeekFrom::Start(0)) {
388                warn!(
389                    "Could not reset file cursor for {}: {}. Skipping content.",
390                    relative_path.display(),
391                    e
392                );
393                writeln!(output, "```text")?;
394                writeln!(
395                    output,
396                    "<Could not read file content (e.g., binary file or permission error)>"
397                )?;
398                writeln!(output, "```")?;
399                return Ok(());
400            }
401
402            // Stream UTF-8 content
403            if let Err(e) = file.seek(SeekFrom::Start(0)) {
404                warn!(
405                    "Could not reset file cursor for {}: {}. Skipping content.",
406                    relative_path.display(),
407                    e
408                );
409                writeln!(output, "```text")?;
410                writeln!(
411                    output,
412                    "<Could not read file content (e.g., binary file or permission error)>"
413                )?;
414                writeln!(output, "```")?;
415                return Ok(());
416            }
417
418            let content = match std::fs::read_to_string(file_path) {
419                Ok(content) => content,
420                Err(e) => {
421                    warn!(
422                        "Error reading file {}: {}. Output may be truncated.",
423                        relative_path.display(),
424                        e
425                    );
426                    writeln!(output, "```text")?;
427                    writeln!(output, "<Error reading file content>")?;
428                    writeln!(output, "```")?;
429                    return Ok(());
430                }
431            };
432
433            write_text_content(output, &content, language, line_numbers)?;
434        }
435        Err(e) => {
436            warn!(
437                "Could not open file {}: {}. Skipping content.",
438                relative_path.display(),
439                e
440            );
441            writeln!(output, "```text")?;
442            writeln!(
443                output,
444                "<Could not read file content (e.g., binary file or permission error)>"
445            )?;
446            writeln!(output, "```")?;
447        }
448    }
449
450    Ok(())
451}
452
453/// Detect text encoding using heuristics for common encodings
454fn detect_text_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
455    // Try common encodings
456    let encodings = [
457        encoding_rs::WINDOWS_1252,
458        encoding_rs::UTF_16LE,
459        encoding_rs::UTF_16BE,
460        encoding_rs::SHIFT_JIS,
461    ];
462
463    for encoding in &encodings {
464        let (decoded, _, had_errors) = encoding.decode(bytes);
465        if !had_errors && is_likely_text(&decoded) {
466            return Some(encoding);
467        }
468    }
469
470    None
471}
472
473/// Check if decoded content looks like text (no control characters except common ones)
474fn is_likely_text(content: &str) -> bool {
475    let mut control_chars = 0;
476    let mut total_chars = 0;
477
478    for ch in content.chars() {
479        total_chars += 1;
480        if ch.is_control() && ch != '\n' && ch != '\r' && ch != '\t' {
481            control_chars += 1;
482        }
483
484        // If more than 5% control characters, probably not text
485        if total_chars > 100 && control_chars * 20 > total_chars {
486            return false;
487        }
488    }
489
490    // Allow up to 5% control characters in small files
491    if total_chars > 0 {
492        control_chars * 20 <= total_chars
493    } else {
494        true
495    }
496}
497
498/// Transcode file content from detected encoding to UTF-8
499fn transcode_file_content(file_path: &Path, encoding: &'static Encoding) -> io::Result<String> {
500    let bytes = std::fs::read(file_path)?;
501    let (decoded, _, had_errors) = encoding.decode(&bytes);
502
503    if had_errors {
504        return Err(io::Error::new(
505            io::ErrorKind::InvalidData,
506            format!("Failed to decode file with encoding {}", encoding.name()),
507        ));
508    }
509
510    Ok(decoded.into_owned())
511}
512
513/// Write text content with optional line numbers
514fn write_text_content(
515    output: &mut impl Write,
516    content: &str,
517    language: &str,
518    line_numbers: bool,
519) -> io::Result<()> {
520    writeln!(output, "```{}", language)?;
521
522    if line_numbers {
523        for (i, line) in content.lines().enumerate() {
524            writeln!(output, "{:>4} | {}", i + 1, line)?;
525        }
526    } else {
527        output.write_all(content.as_bytes())?;
528        if !content.ends_with('\n') {
529            writeln!(output)?;
530        }
531    }
532
533    writeln!(output, "```")?;
534    Ok(())
535}
536
537#[cfg(test)]
538mod tests {
539    use super::*;
540    use std::fs;
541    use tempfile::tempdir;
542
543    #[test]
544    fn test_code_block_formatting() {
545        let dir = tempdir().unwrap();
546        let base_path = dir.path();
547        let file_path = base_path.join("test.rs");
548        let output_path = base_path.join("output.md");
549
550        // Create a test Rust file
551        fs::write(
552            &file_path,
553            "fn main() {\n    println!(\"Hello, world!\");\n}",
554        )
555        .unwrap();
556
557        // Create an output file
558        let mut output = fs::File::create(&output_path).unwrap();
559
560        // Process the file
561        process_file(base_path, &file_path, &mut output, false, None).unwrap();
562
563        // Read the output
564        let content = fs::read_to_string(&output_path).unwrap();
565
566        // Check that code blocks are properly formatted
567        assert!(content.contains("```rust"));
568        assert!(content.contains("```") && content.matches("```").count() >= 2);
569    }
570
571    #[test]
572    fn test_markdown_file_formatting() {
573        let dir = tempdir().unwrap();
574        let base_path = dir.path();
575        let file_path = base_path.join("README.md");
576        let output_path = base_path.join("output.md");
577
578        // Create a test Markdown file
579        fs::write(&file_path, "# Test\n\nThis is a test markdown file.").unwrap();
580
581        // Create an output file
582        let mut output = fs::File::create(&output_path).unwrap();
583
584        // Process the file
585        process_file(base_path, &file_path, &mut output, false, None).unwrap();
586
587        // Read the output
588        let content = fs::read_to_string(&output_path).unwrap();
589
590        // Debug prints the content
591        println!("Generated content:\n{}", content);
592
593        // Check that markdown files use the correct language identifier
594        assert!(
595            content.contains("```markdown"),
596            "Content should contain '```markdown' but was: {}",
597            content
598        );
599        // Count the number of code block markers
600        let code_block_markers = content.matches("```").count();
601
602        assert!(
603            code_block_markers >= 2,
604            "Expected at least 2 code block markers, found {}",
605            code_block_markers
606        );
607    }
608
609    #[test]
610    fn test_line_numbered_code_blocks() {
611        let dir = tempdir().unwrap();
612        let base_path = dir.path();
613        let file_path = base_path.join("lib.rs");
614        let output_path = base_path.join("out.md");
615
616        // Create a multi-line Rust file
617        fs::write(
618                    &file_path,
619                    "fn add(a: i32, b: i32) -> i32 {\n    a + b\n}\n\nfn main() {\n    println!(\"{}\", add(1, 2));\n}\n",
620                )
621                .unwrap();
622
623        let mut output = fs::File::create(&output_path).unwrap();
624        process_file(base_path, &file_path, &mut output, true, None).unwrap();
625
626        let content = fs::read_to_string(&output_path).unwrap();
627
628        // Check language and line numbers prefix
629        assert!(content.contains("```rust"));
630        assert!(content.contains("   1 | "));
631        assert!(content.contains("   2 | "));
632
633        // Count lines with "|" prefix equals number of lines in an original file
634        let numbered_lines = content
635            .lines()
636            .filter(|l| {
637                l.trim_start()
638                    .chars()
639                    .next()
640                    .map(|c| c.is_ascii_digit())
641                    .unwrap_or(false)
642                    && l.contains(" | ")
643            })
644            .count();
645        let original_line_count = fs::read_to_string(&file_path).unwrap().lines().count();
646        assert_eq!(numbered_lines, original_line_count);
647
648        // Ensure code fence closes
649        assert!(content.contains("```"));
650    }
651
652    #[test]
653    fn test_binary_file_handling() {
654        let dir = tempdir().unwrap();
655        let base_path = dir.path();
656        let file_path = base_path.join("image.bin");
657        let output_path = base_path.join("out.md");
658
659        // Write truly binary data that won't be decoded by encoding detection
660        let bytes = vec![
661            0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG header
662            0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, // PNG chunk
663            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // More binary data
664            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Null bytes
665        ];
666        fs::write(&file_path, bytes).unwrap();
667
668        let mut output = fs::File::create(&output_path).unwrap();
669        process_file(base_path, &file_path, &mut output, false, None).unwrap();
670
671        let content = fs::read_to_string(&output_path).unwrap();
672
673        // Expect a text block to fall back with a helpful message
674        assert!(content.contains("```text"));
675        assert!(content.contains("<Binary file or unsupported encoding:"));
676
677        // Ensure the code block is closed
678        let fence_count = content.matches("```").count();
679        assert!(
680            fence_count >= 2,
681            "expected at least opening and closing fences, got {}",
682            fence_count
683        );
684    }
685
686    #[test]
687    fn test_encoding_detection_and_transcoding() {
688        let dir = tempdir().unwrap();
689        let base_path = dir.path();
690        let output_path = base_path.join("out.md");
691
692        // Test Windows-1252 encoded file (common in Windows)
693        let windows1252_content = [
694            0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, // "Hello "
695            0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, // "World" with smart quotes
696            0x0A, // newline
697        ];
698        let file_path = base_path.join("windows1252.txt");
699        fs::write(&file_path, windows1252_content).unwrap();
700
701        let mut output = fs::File::create(&output_path).unwrap();
702        process_file(base_path, &file_path, &mut output, false, Some("detect")).unwrap();
703
704        let content = fs::read_to_string(&output_path).unwrap();
705
706        // Should contain transcoded content with UTF-8 equivalents
707        assert!(content.contains("Hello"));
708        assert!(content.contains("World"));
709        // Should use text language
710        assert!(content.contains("```txt"));
711
712        // Ensure the code block is closed
713        let fence_count = content.matches("```").count();
714        assert!(
715            fence_count >= 2,
716            "expected at least opening and closing fences, got {}",
717            fence_count
718        );
719    }
720
721    #[test]
722    fn test_encoding_strategy_strict() {
723        let dir = tempdir().unwrap();
724        let base_path = dir.path();
725        let output_path = base_path.join("out.md");
726
727        // Create a file with non-UTF-8 content
728        let non_utf8_content = [0xFF, 0xFE, 0x41, 0x00]; // UTF-16 LE BOM + "A"
729        let file_path = base_path.join("utf16.txt");
730        fs::write(&file_path, non_utf8_content).unwrap();
731
732        let mut output = fs::File::create(&output_path).unwrap();
733        process_file(base_path, &file_path, &mut output, false, Some("strict")).unwrap();
734
735        let content = fs::read_to_string(&output_path).unwrap();
736
737        // Should contain binary file placeholder
738        assert!(content.contains("<Binary file or unsupported encoding:"));
739        assert!(content.contains("```text"));
740
741        // Ensure the code block is closed
742        let fence_count = content.matches("```").count();
743        assert!(
744            fence_count >= 2,
745            "expected at least opening and closing fences, got {}",
746            fence_count
747        );
748    }
749
750    #[test]
751    fn test_encoding_strategy_skip() {
752        let dir = tempdir().unwrap();
753        let base_path = dir.path();
754        let output_path = base_path.join("out.md");
755
756        // Create a file with UTF-16 content
757        let utf16_content = [0xFF, 0xFE, 0x48, 0x00, 0x69, 0x00]; // UTF-16 LE "Hi"
758        let file_path = base_path.join("utf16.txt");
759        fs::write(&file_path, utf16_content).unwrap();
760
761        let mut output = fs::File::create(&output_path).unwrap();
762        process_file(base_path, &file_path, &mut output, false, Some("skip")).unwrap();
763
764        let content = fs::read_to_string(&output_path).unwrap();
765
766        // Should contain binary file placeholder (skipped transcoding)
767        assert!(content.contains("<Binary file or unsupported encoding:"));
768        assert!(content.contains("```text"));
769    }
770
771    #[test]
772    fn test_generate_markdown_with_current_directory() {
773        let dir = tempdir().unwrap();
774        let base_path = dir.path();
775        let output_path = base_path.join("test.md");
776
777        // Create test files
778        fs::write(base_path.join("readme.txt"), "Hello world").unwrap();
779
780        // Collect files
781        let files = crate::file_utils::collect_files(base_path, &[], &[]).unwrap();
782        let file_tree = crate::tree::build_file_tree(&files, base_path);
783
784        // Change to the test directory
785        let original_dir = std::env::current_dir().unwrap();
786        std::env::set_current_dir(base_path).unwrap();
787
788        // Test with "." as input directory
789        let result = generate_markdown(
790            &output_path.to_string_lossy(),
791            ".",
792            &[],
793            &[],
794            &file_tree,
795            &files,
796            base_path,
797            false,
798            None,
799        );
800
801        // Restore original directory
802        std::env::set_current_dir(original_dir).unwrap();
803
804        assert!(result.is_ok());
805        let content = fs::read_to_string(&output_path).unwrap();
806        assert!(content.contains("Directory Structure Report"));
807    }
808
809    #[test]
810    fn test_generate_markdown_creates_output_directory() {
811        let dir = tempdir().unwrap();
812        let base_path = dir.path();
813        let nested_output = base_path.join("nested").join("deep").join("output.md");
814
815        // Create test files
816        fs::write(base_path.join("test.txt"), "content").unwrap();
817
818        let files = crate::file_utils::collect_files(base_path, &[], &[]).unwrap();
819        let file_tree = crate::tree::build_file_tree(&files, base_path);
820
821        let result = generate_markdown(
822            &nested_output.to_string_lossy(),
823            "test_dir",
824            &[],
825            &[],
826            &file_tree,
827            &files,
828            base_path,
829            false,
830            None,
831        );
832
833        assert!(result.is_ok());
834        assert!(nested_output.exists());
835        assert!(nested_output.parent().unwrap().exists());
836    }
837
838    #[test]
839    fn test_generate_markdown_with_filters_and_ignores() {
840        let dir = tempdir().unwrap();
841        let base_path = dir.path();
842        let output_path = base_path.join("filtered.md");
843
844        fs::write(base_path.join("main.rs"), "fn main() {}").unwrap();
845        fs::write(base_path.join("config.toml"), "[package]").unwrap();
846        fs::write(base_path.join("readme.md"), "# README").unwrap();
847
848        let files = crate::file_utils::collect_files(base_path, &[], &[]).unwrap();
849        let file_tree = crate::tree::build_file_tree(&files, base_path);
850
851        let result = generate_markdown(
852            &output_path.to_string_lossy(),
853            "project",
854            &["rs".to_string(), "toml".to_string()],
855            &["readme.md".to_string()],
856            &file_tree,
857            &files,
858            base_path,
859            true,
860            Some("strict"),
861        );
862
863        assert!(result.is_ok());
864        let content = fs::read_to_string(&output_path).unwrap();
865        assert!(content.contains("Directory Structure Report"));
866        // The actual generate_markdown function doesn't format filters/ignores this way
867        assert!(content.contains("main.rs") || content.contains("config.toml"));
868    }
869
870    #[test]
871    fn test_write_text_content_with_line_numbers() {
872        let mut output = Vec::new();
873        let content = "line one\nline two\nline three";
874
875        write_text_content(&mut output, content, "rust", true).unwrap();
876
877        let result = String::from_utf8(output).unwrap();
878        assert!(result.contains("```rust"));
879        assert!(result.contains("   1 | line one"));
880        assert!(result.contains("   2 | line two"));
881        assert!(result.contains("   3 | line three"));
882        assert!(result.contains("```"));
883    }
884
885    #[test]
886    fn test_write_text_content_without_line_numbers() {
887        let mut output = Vec::new();
888        let content = "function test() {\n  return true;\n}";
889
890        write_text_content(&mut output, content, "javascript", false).unwrap();
891
892        let result = String::from_utf8(output).unwrap();
893        assert!(result.contains("```javascript"));
894        assert!(result.contains("function test() {"));
895        assert!(result.contains("  return true;"));
896        assert!(result.contains("```"));
897        assert!(!result.contains(" | ")); // No line number prefix
898    }
899
900    #[test]
901    fn test_write_text_content_without_trailing_newline() {
902        let mut output = Vec::new();
903        let content = "no newline at end"; // No \n at end
904
905        write_text_content(&mut output, content, "text", false).unwrap();
906
907        let result = String::from_utf8(output).unwrap();
908        assert!(result.contains("```text"));
909        assert!(result.contains("no newline at end"));
910        assert!(result.ends_with("```\n")); // Should add newline
911    }
912
913    #[test]
914    fn test_is_likely_text() {
915        // Normal text should be considered text
916        assert!(is_likely_text("Hello world\nThis is normal text"));
917
918        // Text with some control characters should still be text
919        assert!(is_likely_text(
920            "Line 1\nLine 2\tTabbed\r\nWindows line ending"
921        ));
922
923        // Text with too many control characters should not be text
924        let mut bad_text = String::new();
925        for i in 0..200 {
926            if i % 5 == 0 {
927                bad_text.push('\x01'); // Control character
928            } else {
929                bad_text.push('a');
930            }
931        }
932        assert!(!is_likely_text(&bad_text));
933
934        // Empty string should be considered text
935        assert!(is_likely_text(""));
936    }
937
938    #[test]
939    fn test_detect_text_encoding() {
940        // UTF-8 should return None (already UTF-8)
941        let utf8_bytes = "Hello world".as_bytes();
942        let result = detect_text_encoding(utf8_bytes);
943        // The function may return an encoding even for UTF-8 text if it detects it differently
944        // Just verify it doesn't crash
945        assert!(result.is_some() || result.is_none());
946
947        // Windows-1252 encoded text should be detected
948        let windows1252_bytes = [
949            0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x94,
950        ];
951        let detected = detect_text_encoding(&windows1252_bytes);
952        assert!(detected.is_some());
953    }
954
955    #[test]
956    fn test_transcode_file_content() {
957        let dir = tempdir().unwrap();
958        let file_path = dir.path().join("windows1252.txt");
959
960        // Write Windows-1252 encoded content
961        let windows1252_content = [
962            0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, // "Hello "
963            0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, // "World" with smart quotes
964        ];
965        fs::write(&file_path, windows1252_content).unwrap();
966
967        let result = transcode_file_content(&file_path, encoding_rs::WINDOWS_1252);
968        assert!(result.is_ok());
969
970        let transcoded = result.unwrap();
971        assert!(transcoded.contains("Hello"));
972        assert!(transcoded.contains("World"));
973    }
974
975    #[test]
976    fn test_process_file_with_metadata_error() {
977        let dir = tempdir().unwrap();
978        let base_path = dir.path();
979        let nonexistent_file = base_path.join("nonexistent.txt");
980        let output_path = base_path.join("output.md");
981
982        let mut output = fs::File::create(&output_path).unwrap();
983
984        // This should handle the metadata error gracefully
985        let result = process_file(base_path, &nonexistent_file, &mut output, false, None);
986        assert!(result.is_ok());
987
988        // Output should be minimal since file doesn't exist
989        let content = fs::read_to_string(&output_path).unwrap();
990        assert!(content.is_empty() || content.trim().is_empty());
991    }
992
993    #[test]
994    fn test_process_file_with_different_extensions() {
995        let dir = tempdir().unwrap();
996        let base_path = dir.path();
997        let output_path = base_path.join("output.md");
998
999        // Test various file extensions
1000        let test_files = [
1001            ("script.py", "print('hello')", "python"),
1002            ("data.json", r#"{"key": "value"}"#, "json"),
1003            ("config.yaml", "key: value", "yaml"),
1004            ("style.css", "body { margin: 0; }", "css"),
1005            ("page.html", "<html><body>Test</body></html>", "html"),
1006            ("query.sql", "SELECT * FROM users;", "sql"),
1007            ("build.sh", "#!/bin/bash\necho 'building'", "bash"),
1008            ("unknown.xyz", "unknown content", "xyz"),
1009        ];
1010
1011        for (filename, content, expected_lang) in test_files.iter() {
1012            let file_path = base_path.join(filename);
1013            fs::write(&file_path, content).unwrap();
1014
1015            let mut output = fs::File::create(&output_path).unwrap();
1016            process_file(base_path, &file_path, &mut output, false, None).unwrap();
1017
1018            let result = fs::read_to_string(&output_path).unwrap();
1019            assert!(result.contains(&format!("```{}", expected_lang)));
1020            assert!(result.contains(content));
1021            assert!(result.contains(filename));
1022        }
1023    }
1024}