Skip to main content

context_builder/
markdown.rs

1use chrono::Utc;
2use ignore::DirEntry;
3use log::{error, info, warn};
4use std::fs;
5use std::io::{self, Read, Seek, SeekFrom, Write};
6use std::path::Path;
7
8use crate::tree::{FileTree, write_tree_to_file};
9use encoding_rs::{Encoding, UTF_8};
10
11#[cfg(feature = "parallel")]
12use crossbeam_channel::{Receiver, Sender, bounded};
13#[cfg(feature = "parallel")]
14use std::thread;
15
16/// Configuration for tree-sitter powered output.
17#[derive(Debug, Clone, Default)]
18pub struct TreeSitterConfig {
19    /// Output only signatures (function/type declarations) instead of full content.
20    pub signatures: bool,
21    /// Include a structure summary (counts of functions, structs, etc.) per file.
22    pub structure: bool,
23    /// Truncation mode: "smart" uses AST boundaries, anything else uses byte truncation.
24    pub truncate: String,
25    /// Visibility filter: "public", "private", or "all".
26    pub visibility: String,
27}
28
29/// Generates the final Markdown file.
30#[allow(clippy::too_many_arguments, unused_variables)]
31pub fn generate_markdown(
32    output_path: &str,
33    input_dir: &str,
34    filters: &[String],
35    ignores: &[String],
36    file_tree: &FileTree,
37    files: &[DirEntry],
38    base_path: &Path,
39    line_numbers: bool,
40    encoding_strategy: Option<&str>,
41    max_tokens: Option<usize>,
42    ts_config: &TreeSitterConfig,
43) -> io::Result<()> {
44    if let Some(parent) = Path::new(output_path).parent()
45        && !parent.exists()
46    {
47        fs::create_dir_all(parent)?;
48    }
49
50    let mut output = fs::File::create(output_path)?;
51
52    let input_dir_name = if input_dir == "." {
53        let current_dir = std::env::current_dir()?;
54        current_dir
55            .file_name()
56            .and_then(|n| n.to_str())
57            .unwrap_or_else(|| current_dir.to_str().unwrap_or("project"))
58            .to_string()
59    } else {
60        input_dir.to_string()
61    };
62
63    // --- Header --- //
64    writeln!(output, "# Directory Structure Report\n")?;
65
66    if !filters.is_empty() {
67        writeln!(
68            output,
69            "This document contains files from the `{}` directory with extensions: {}",
70            input_dir_name,
71            filters.join(", ")
72        )?;
73    } else {
74        writeln!(
75            output,
76            "This document contains all files from the `{}` directory, optimized for LLM consumption.",
77            input_dir_name
78        )?;
79    }
80
81    if !ignores.is_empty() {
82        writeln!(output, "Custom ignored patterns: {}", ignores.join(", "))?;
83    }
84
85    // Deterministic content hash (enables LLM prompt caching across runs)
86    // Uses xxh3 over file content bytes — stable across Rust versions and machines.
87    // Previous implementation hashed mtime (broken by git checkout, cp, etc.)
88    let mut content_hasher = xxhash_rust::xxh3::Xxh3::new();
89    for entry in files {
90        // Hash relative unix-style path for cross-OS determinism.
91        // Using absolute or OS-native paths would produce different hashes
92        // on different machines or operating systems.
93        let rel_path = entry.path().strip_prefix(base_path).unwrap_or(entry.path());
94        let normalized = rel_path.to_string_lossy().replace('\\', "/");
95        content_hasher.update(normalized.as_bytes());
96        // Null delimiter prevents collision: path="a" content="bc" vs path="ab" content="c"
97        content_hasher.update(b"\0");
98        // Hash actual file content (not mtime!) for determinism
99        if let Ok(bytes) = std::fs::read(entry.path()) {
100            content_hasher.update(&bytes);
101        }
102        content_hasher.update(b"\0");
103    }
104    writeln!(output, "Content hash: {:016x}", content_hasher.digest())?;
105    writeln!(output)?;
106
107    // --- File Tree --- //
108
109    writeln!(output, "## File Tree Structure\n")?;
110
111    write_tree_to_file(&mut output, file_tree, 0)?;
112
113    writeln!(output)?;
114
115    // (No '## Files' heading here; it will be injected later only once during final composition)
116    // (Diff section will be conditionally inserted later by the auto_diff logic in lib.rs)
117
118    #[cfg(feature = "parallel")]
119    {
120        use rayon::prelude::*;
121
122        // Create a bounded channel for ordered chunks
123        type ChunkResult = (usize, io::Result<Vec<u8>>);
124        let (sender, receiver): (Sender<ChunkResult>, Receiver<ChunkResult>) =
125            bounded(num_cpus::get() * 2); // Buffer size based on CPU count
126
127        let writer_handle = {
128            let mut output = output;
129            let total_files = files.len();
130            let budget = max_tokens;
131
132            thread::spawn(move || -> io::Result<()> {
133                let mut completed_chunks = std::collections::BTreeMap::new();
134                let mut next_index = 0;
135                let mut errors = Vec::new();
136                let mut tokens_used: usize = 0;
137                let mut budget_exceeded = false;
138
139                // Receive chunks and write them in order
140                while next_index < total_files {
141                    match receiver.recv() {
142                        Ok((index, chunk_result)) => {
143                            completed_chunks.insert(index, chunk_result);
144
145                            // Write all consecutive chunks starting from next_index
146                            while let Some(chunk_result) = completed_chunks.remove(&next_index) {
147                                if budget_exceeded {
148                                    // Already over budget — skip remaining chunks
149                                    next_index += 1;
150                                    continue;
151                                }
152
153                                match chunk_result {
154                                    Ok(buf) => {
155                                        // Estimate tokens for this chunk (~4 bytes per token)
156                                        let chunk_tokens = buf.len() / 4;
157
158                                        if let Some(max) = budget
159                                            && tokens_used + chunk_tokens > max
160                                            && tokens_used > 0
161                                        {
162                                            let remaining = total_files - next_index;
163                                            let notice = format!(
164                                                "---\n\n_⚠️ Token budget ({}) reached. {} remaining files omitted._\n\n",
165                                                max, remaining
166                                            );
167                                            if let Err(e) = output.write_all(notice.as_bytes()) {
168                                                errors.push(format!(
169                                                    "Failed to write truncation notice: {}",
170                                                    e
171                                                ));
172                                            }
173                                            budget_exceeded = true;
174                                            next_index += 1;
175                                            continue;
176                                        }
177
178                                        tokens_used += chunk_tokens;
179                                        if let Err(e) = output.write_all(&buf) {
180                                            errors.push(format!(
181                                                "Failed to write output for file index {}: {}",
182                                                next_index, e
183                                            ));
184                                        }
185                                    }
186                                    Err(e) => {
187                                        errors.push(format!(
188                                            "Failed to process file index {}: {}",
189                                            next_index, e
190                                        ));
191                                    }
192                                }
193                                next_index += 1;
194                            }
195                        }
196                        Err(_) => break, // Channel closed
197                    }
198                }
199
200                if !errors.is_empty() {
201                    error!(
202                        "Encountered {} errors during parallel processing:",
203                        errors.len()
204                    );
205                    for err in &errors {
206                        error!("  {}", err);
207                    }
208                    return Err(std::io::Error::other(format!(
209                        "Failed to process {} files: {}",
210                        errors.len(),
211                        errors.join("; ")
212                    )));
213                }
214
215                Ok(())
216            })
217        };
218
219        // Process files in parallel and send results to writer
220        let ts_config_clone = ts_config.clone();
221        files.par_iter().enumerate().for_each(|(index, entry)| {
222            let mut buf = Vec::new();
223            let result = process_file(
224                base_path,
225                entry.path(),
226                &mut buf,
227                line_numbers,
228                encoding_strategy,
229                &ts_config_clone,
230            )
231            .map(|_| buf);
232
233            // Send result to writer thread (ignore send errors - channel might be closed)
234            let _ = sender.send((index, result));
235        });
236
237        // Close the sender to signal completion
238        drop(sender);
239
240        // Wait for writer thread to complete and propagate any errors
241        writer_handle
242            .join()
243            .map_err(|_| std::io::Error::other("Writer thread panicked"))??;
244    }
245
246    #[cfg(not(feature = "parallel"))]
247    {
248        let mut tokens_used: usize = 0;
249
250        for (idx, entry) in files.iter().enumerate() {
251            // Estimate tokens for this file (~4 bytes per token)
252            let file_size = std::fs::metadata(entry.path())
253                .map(|m| m.len())
254                .unwrap_or(0);
255            let estimated_file_tokens = (file_size as usize) / 4;
256
257            if let Some(budget) = max_tokens {
258                if tokens_used + estimated_file_tokens > budget && tokens_used > 0 {
259                    let remaining = files.len() - idx;
260                    writeln!(output, "---\n")?;
261                    writeln!(
262                        output,
263                        "_⚠️ Token budget ({}) reached. {} remaining files omitted._\n",
264                        budget, remaining
265                    )?;
266                    break;
267                }
268            }
269
270            tokens_used += estimated_file_tokens;
271            process_file(
272                base_path,
273                entry.path(),
274                &mut output,
275                line_numbers,
276                encoding_strategy,
277                ts_config,
278            )?;
279        }
280    }
281
282    Ok(())
283}
284
285/// Processes a single file and writes its content to the output.
286pub fn process_file(
287    base_path: &Path,
288    file_path: &Path,
289    output: &mut impl Write,
290    line_numbers: bool,
291    encoding_strategy: Option<&str>,
292    ts_config: &TreeSitterConfig,
293) -> io::Result<()> {
294    let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
295    info!("Processing file: {}", relative_path.display());
296
297    let metadata = match fs::metadata(file_path) {
298        Ok(meta) => meta,
299        Err(e) => {
300            error!(
301                "Failed to get metadata for {}: {}",
302                relative_path.display(),
303                e
304            );
305            return Ok(());
306        }
307    };
308
309    let modified_time = metadata
310        .modified()
311        .ok()
312        .map(|time| {
313            let system_time: chrono::DateTime<Utc> = time.into();
314            system_time.format("%Y-%m-%d %H:%M:%S UTC").to_string()
315        })
316        .unwrap_or_else(|| "Unknown".to_string());
317
318    writeln!(output)?;
319    writeln!(output, "### File: `{}`", relative_path.display())?;
320
321    writeln!(output)?;
322
323    writeln!(output, "- Size: {} bytes", metadata.len())?;
324    writeln!(output, "- Modified: {}", modified_time)?;
325    writeln!(output)?;
326
327    // --- File Content --- //
328    let extension = file_path
329        .extension()
330        .and_then(|s| s.to_str())
331        .unwrap_or("text");
332    let language = match extension {
333        "rs" => "rust",
334        "js" => "javascript",
335        "ts" => "typescript",
336        "jsx" => "jsx",
337        "tsx" => "tsx",
338        "json" => "json",
339        "toml" => "toml",
340        "md" => "markdown",
341        "yaml" | "yml" => "yaml",
342        "html" => "html",
343        "css" => "css",
344        "py" => "python",
345        "java" => "java",
346        "cpp" => "cpp",
347        "c" => "c",
348        "h" => "c",
349        "hpp" => "cpp",
350        "sql" => "sql",
351        "sh" => "bash",
352        "xml" => "xml",
353        "lock" => "toml",
354        _ => extension,
355    };
356
357    // Enhanced binary file handling with encoding detection and transcoding
358    match fs::File::open(file_path) {
359        Ok(mut file) => {
360            let mut sniff = [0u8; 8192];
361            let n = match file.read(&mut sniff) {
362                Ok(n) => n,
363                Err(e) => {
364                    warn!(
365                        "Could not read file {}: {}. Skipping content.",
366                        relative_path.display(),
367                        e
368                    );
369
370                    writeln!(output, "```text")?;
371
372                    writeln!(
373                        output,
374                        "<Could not read file content (e.g., binary file or permission error)>"
375                    )?;
376
377                    writeln!(output, "```")?;
378
379                    return Ok(());
380                }
381            };
382            let slice = &sniff[..n];
383
384            // Find a valid UTF-8 boundary by backtracking up to 3 bytes.
385            // If the sniff buffer cuts a multi-byte char (e.g., emoji at byte 8191),
386            // from_utf8 would falsely classify the file as non-UTF-8.
387            let check_len = if n == sniff.len() {
388                // Buffer is full — may have split a multi-byte char at the end
389                let mut end = n;
390                while end > 0 && end > n.saturating_sub(4) && sniff[end - 1] & 0xC0 == 0x80 {
391                    end -= 1; // skip continuation bytes
392                }
393                // If we landed on a leading byte, check if the sequence is complete
394                if end > 0 && end < n {
395                    let leading = sniff[end - 1];
396                    let expected_len = if leading & 0xE0 == 0xC0 {
397                        2
398                    } else if leading & 0xF0 == 0xE0 {
399                        3
400                    } else if leading & 0xF8 == 0xF0 {
401                        4
402                    } else {
403                        1
404                    };
405                    if end - 1 + expected_len > n {
406                        end - 1 // incomplete char — exclude the leading byte too
407                    } else {
408                        n
409                    }
410                } else {
411                    n
412                }
413            } else {
414                n // didn't fill the buffer, so no boundary issue
415            };
416
417            // First check if it's valid UTF-8
418            let is_utf8 = std::str::from_utf8(&sniff[..check_len]).is_ok();
419
420            if is_utf8 && !slice.contains(&0) {
421                // Valid UTF-8 text file - proceed normally
422            } else {
423                // Try encoding detection for non-UTF-8 files
424                // If it's not UTF-8, try to detect the encoding
425                let (encoding, _consumed) =
426                    encoding_rs::Encoding::for_bom(slice).unwrap_or((encoding_rs::UTF_8, 0));
427
428                // If it's not UTF-8, try to detect the encoding
429                let detected_encoding = if encoding == UTF_8 {
430                    // Use chardet-like detection for common encodings
431                    detect_text_encoding(slice)
432                } else {
433                    Some(encoding)
434                };
435
436                match detected_encoding {
437                    Some(enc) if enc != UTF_8 => {
438                        let strategy = encoding_strategy.unwrap_or("detect");
439                        match strategy {
440                            "strict" | "skip" => {
441                                // Skip files with non-UTF-8 encoding
442                                warn!(
443                                    "Skipping non-UTF-8 file {} (encoding: {}, strategy: {})",
444                                    relative_path.display(),
445                                    enc.name(),
446                                    strategy
447                                );
448                            }
449                            _ => {
450                                // Default "detect" strategy: attempt to transcode
451                                match transcode_file_content(file_path, enc) {
452                                    Ok(transcoded_content) => {
453                                        info!(
454                                            "Successfully transcoded {} from {} to UTF-8",
455                                            relative_path.display(),
456                                            enc.name()
457                                        );
458                                        write_text_content(
459                                            output,
460                                            &transcoded_content,
461                                            language,
462                                            line_numbers,
463                                        )?;
464                                        return Ok(());
465                                    }
466                                    Err(e) => {
467                                        warn!(
468                                            "Failed to transcode {} from {}: {}. Treating as binary.",
469                                            relative_path.display(),
470                                            enc.name(),
471                                            e
472                                        );
473                                    }
474                                }
475                            }
476                        }
477                    }
478                    _ => {
479                        // Check if it's likely binary (contains null bytes)
480                        if slice.contains(&0) {
481                            warn!(
482                                "Detected binary file {} (contains null bytes). Skipping content.",
483                                relative_path.display()
484                            );
485                        } else {
486                            warn!(
487                                "Could not determine encoding for {}. Treating as binary.",
488                                relative_path.display()
489                            );
490                        }
491                    }
492                }
493
494                // Fallback to binary file placeholder
495                writeln!(output, "```text")?;
496                writeln!(
497                    output,
498                    "<Binary file or unsupported encoding: {} bytes>",
499                    metadata.len()
500                )?;
501                writeln!(output, "```")?;
502                return Ok(());
503            }
504
505            // Reset cursor and stream the content
506            if let Err(e) = file.seek(SeekFrom::Start(0)) {
507                warn!(
508                    "Could not reset file cursor for {}: {}. Skipping content.",
509                    relative_path.display(),
510                    e
511                );
512                writeln!(output, "```text")?;
513                writeln!(
514                    output,
515                    "<Could not read file content (e.g., binary file or permission error)>"
516                )?;
517                writeln!(output, "```")?;
518                return Ok(());
519            }
520
521            // Stream UTF-8 content
522            let content = match std::fs::read_to_string(file_path) {
523                Ok(content) => content,
524                Err(e) => {
525                    warn!(
526                        "Error reading file {}: {}. Output may be truncated.",
527                        relative_path.display(),
528                        e
529                    );
530                    writeln!(output, "```text")?;
531                    writeln!(output, "<Error reading file content>")?;
532                    writeln!(output, "```")?;
533                    return Ok(());
534                }
535            };
536            // When --signatures is active, replace file content with signatures-only output
537            // ONLY for extensions that tree-sitter actually supports. Non-code files
538            // (Cargo.toml, README.md, .yaml, etc.) must always show full content.
539            let signatures_only =
540                ts_config.signatures && crate::tree_sitter::is_supported_extension(extension);
541
542            if !signatures_only {
543                // Note: Smart truncation (`truncate: "smart"`) indicates AST-boundary
544                // truncation should be preferred when content needs truncating.
545                // Without a per-file max_tokens budget, no truncation is applied.
546                // The flag is stored for future use when per-file token limits are implemented.
547                write_text_content(output, &content, language, line_numbers)?;
548            }
549
550            // Tree-sitter enrichment: signatures and/or structure
551            write_tree_sitter_enrichment(output, &content, extension, ts_config)?;
552        }
553        Err(e) => {
554            warn!(
555                "Could not open file {}: {}. Skipping content.",
556                relative_path.display(),
557                e
558            );
559            writeln!(output, "```text")?;
560            writeln!(
561                output,
562                "<Could not read file content (e.g., binary file or permission error)>"
563            )?;
564            writeln!(output, "```")?;
565        }
566    }
567
568    Ok(())
569}
570
571/// Write tree-sitter enrichment (signatures, structure) after file content.
572#[allow(unused_variables)]
573pub fn write_tree_sitter_enrichment(
574    output: &mut impl Write,
575    content: &str,
576    extension: &str,
577    ts_config: &TreeSitterConfig,
578) -> io::Result<()> {
579    if !ts_config.signatures && !ts_config.structure {
580        return Ok(());
581    }
582
583    #[cfg(feature = "tree-sitter-base")]
584    {
585        use crate::tree_sitter::language_support::Visibility;
586
587        let vis_filter: Visibility = ts_config.visibility.parse().unwrap_or(Visibility::All);
588
589        if ts_config.structure
590            && let Some(structure) =
591                crate::tree_sitter::extract_structure_for_file(content, extension)
592        {
593            let summary = crate::tree_sitter::structure::format_structure_as_markdown(&structure);
594            if !summary.is_empty() {
595                writeln!(output)?;
596                write!(output, "{}", summary)?;
597            }
598        }
599
600        if ts_config.signatures
601            && let Some(signatures) =
602                crate::tree_sitter::extract_signatures_for_file(content, extension, vis_filter)
603            && !signatures.is_empty()
604        {
605            let language = match extension {
606                "rs" => "rust",
607                "js" | "mjs" | "cjs" => "javascript",
608                "ts" | "tsx" | "mts" | "cts" => "typescript",
609                "py" | "pyw" => "python",
610                "go" => "go",
611                "java" => "java",
612                "c" | "h" => "c",
613                "cpp" | "cxx" | "cc" | "hpp" | "hxx" | "hh" => "cpp",
614                _ => extension,
615            };
616            writeln!(output)?;
617            writeln!(output, "**Signatures:**")?;
618            writeln!(output)?;
619            let formatted = crate::tree_sitter::signatures::format_signatures_as_markdown(
620                &signatures,
621                language,
622            );
623            write!(output, "{}", formatted)?;
624        }
625    }
626
627    #[cfg(not(feature = "tree-sitter-base"))]
628    {
629        // Tree-sitter not compiled in — flags have no effect.
630        // Warning is printed once at startup in lib.rs.
631    }
632
633    Ok(())
634}
635
636/// Detect text encoding using heuristics for common encodings
637fn detect_text_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
638    // Try common encodings
639    let encodings = [
640        encoding_rs::WINDOWS_1252,
641        encoding_rs::UTF_16LE,
642        encoding_rs::UTF_16BE,
643        encoding_rs::SHIFT_JIS,
644    ];
645
646    for encoding in &encodings {
647        let (decoded, _, had_errors) = encoding.decode(bytes);
648        if !had_errors && is_likely_text(&decoded) {
649            return Some(encoding);
650        }
651    }
652
653    None
654}
655
656/// Check if decoded content looks like text (no control characters except common ones)
657fn is_likely_text(content: &str) -> bool {
658    let mut control_chars = 0;
659    let mut total_chars = 0;
660
661    for ch in content.chars() {
662        total_chars += 1;
663        if ch.is_control() && ch != '\n' && ch != '\r' && ch != '\t' {
664            control_chars += 1;
665        }
666
667        // If more than 5% control characters, probably not text
668        if total_chars > 100 && control_chars * 20 > total_chars {
669            return false;
670        }
671    }
672
673    // Allow up to 5% control characters in small files
674    if total_chars > 0 {
675        control_chars * 20 <= total_chars
676    } else {
677        true
678    }
679}
680
681/// Transcode file content from detected encoding to UTF-8
682fn transcode_file_content(file_path: &Path, encoding: &'static Encoding) -> io::Result<String> {
683    let bytes = std::fs::read(file_path)?;
684    let (decoded, _, had_errors) = encoding.decode(&bytes);
685
686    if had_errors {
687        return Err(io::Error::new(
688            io::ErrorKind::InvalidData,
689            format!("Failed to decode file with encoding {}", encoding.name()),
690        ));
691    }
692
693    Ok(decoded.into_owned())
694}
695
696/// Write text content with optional line numbers
697fn write_text_content(
698    output: &mut impl Write,
699    content: &str,
700    language: &str,
701    line_numbers: bool,
702) -> io::Result<()> {
703    writeln!(output, "```{}", language)?;
704
705    if line_numbers {
706        for (i, line) in content.lines().enumerate() {
707            writeln!(output, "{:>4} | {}", i + 1, line)?;
708        }
709    } else {
710        output.write_all(content.as_bytes())?;
711        if !content.ends_with('\n') {
712            writeln!(output)?;
713        }
714    }
715
716    writeln!(output, "```")?;
717    Ok(())
718}
719
720#[cfg(test)]
721mod tests {
722    use super::*;
723    use serial_test::serial;
724    use std::fs;
725    use tempfile::tempdir;
726
727    #[test]
728    fn test_code_block_formatting() {
729        let dir = tempdir().unwrap();
730        let base_path = dir.path();
731        let file_path = base_path.join("test.rs");
732        let output_path = base_path.join("output.md");
733
734        // Create a test Rust file
735        fs::write(
736            &file_path,
737            "fn main() {\n    println!(\"Hello, world!\");\n}",
738        )
739        .unwrap();
740
741        // Create an output file
742        let mut output = fs::File::create(&output_path).unwrap();
743
744        // Process the file
745        process_file(
746            base_path,
747            &file_path,
748            &mut output,
749            false,
750            None,
751            &TreeSitterConfig::default(),
752        )
753        .unwrap();
754
755        // Read the output
756        let content = fs::read_to_string(&output_path).unwrap();
757
758        // Check that code blocks are properly formatted
759        assert!(content.contains("```rust"));
760        assert!(content.contains("```") && content.matches("```").count() >= 2);
761    }
762
763    #[test]
764    fn test_markdown_file_formatting() {
765        let dir = tempdir().unwrap();
766        let base_path = dir.path();
767        let file_path = base_path.join("README.md");
768        let output_path = base_path.join("output.md");
769
770        // Create a test Markdown file
771        fs::write(&file_path, "# Test\n\nThis is a test markdown file.").unwrap();
772
773        // Create an output file
774        let mut output = fs::File::create(&output_path).unwrap();
775
776        // Process the file
777        process_file(
778            base_path,
779            &file_path,
780            &mut output,
781            false,
782            None,
783            &TreeSitterConfig::default(),
784        )
785        .unwrap();
786
787        // Read the output
788        let content = fs::read_to_string(&output_path).unwrap();
789
790        // Debug prints the content
791        println!("Generated content:\n{}", content);
792
793        // Check that markdown files use the correct language identifier
794        assert!(
795            content.contains("```markdown"),
796            "Content should contain '```markdown' but was: {}",
797            content
798        );
799        // Count the number of code block markers
800        let code_block_markers = content.matches("```").count();
801
802        assert!(
803            code_block_markers >= 2,
804            "Expected at least 2 code block markers, found {}",
805            code_block_markers
806        );
807    }
808
809    #[test]
810    fn test_line_numbered_code_blocks() {
811        let dir = tempdir().unwrap();
812        let base_path = dir.path();
813        let file_path = base_path.join("lib.rs");
814        let output_path = base_path.join("out.md");
815
816        // Create a multi-line Rust file
817        fs::write(
818                    &file_path,
819                    "fn add(a: i32, b: i32) -> i32 {\n    a + b\n}\n\nfn main() {\n    println!(\"{}\", add(1, 2));\n}\n",
820                )
821                .unwrap();
822
823        let mut output = fs::File::create(&output_path).unwrap();
824        process_file(
825            base_path,
826            &file_path,
827            &mut output,
828            true,
829            None,
830            &TreeSitterConfig::default(),
831        )
832        .unwrap();
833
834        let content = fs::read_to_string(&output_path).unwrap();
835
836        // Check language and line numbers prefix
837        assert!(content.contains("```rust"));
838        assert!(content.contains("   1 | "));
839        assert!(content.contains("   2 | "));
840
841        // Count lines with "|" prefix equals number of lines in an original file
842        let numbered_lines = content
843            .lines()
844            .filter(|l| {
845                l.trim_start()
846                    .chars()
847                    .next()
848                    .map(|c| c.is_ascii_digit())
849                    .unwrap_or(false)
850                    && l.contains(" | ")
851            })
852            .count();
853        let original_line_count = fs::read_to_string(&file_path).unwrap().lines().count();
854        assert_eq!(numbered_lines, original_line_count);
855
856        // Ensure code fence closes
857        assert!(content.contains("```"));
858    }
859
860    #[test]
861    fn test_binary_file_handling() {
862        let dir = tempdir().unwrap();
863        let base_path = dir.path();
864        let file_path = base_path.join("image.bin");
865        let output_path = base_path.join("out.md");
866
867        // Write truly binary data that won't be decoded by encoding detection
868        let bytes = vec![
869            0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG header
870            0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, // PNG chunk
871            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // More binary data
872            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Null bytes
873        ];
874        fs::write(&file_path, bytes).unwrap();
875
876        let mut output = fs::File::create(&output_path).unwrap();
877        process_file(
878            base_path,
879            &file_path,
880            &mut output,
881            false,
882            None,
883            &TreeSitterConfig::default(),
884        )
885        .unwrap();
886
887        let content = fs::read_to_string(&output_path).unwrap();
888
889        // Expect a text block to fall back with a helpful message
890        assert!(content.contains("```text"));
891        assert!(content.contains("<Binary file or unsupported encoding:"));
892
893        // Ensure the code block is closed
894        let fence_count = content.matches("```").count();
895        assert!(
896            fence_count >= 2,
897            "expected at least opening and closing fences, got {}",
898            fence_count
899        );
900    }
901
902    #[test]
903    fn test_encoding_detection_and_transcoding() {
904        let dir = tempdir().unwrap();
905        let base_path = dir.path();
906        let output_path = base_path.join("out.md");
907
908        // Test Windows-1252 encoded file (common in Windows)
909        let windows1252_content = [
910            0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, // "Hello "
911            0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, // "World" with smart quotes
912            0x0A, // newline
913        ];
914        let file_path = base_path.join("windows1252.txt");
915        fs::write(&file_path, windows1252_content).unwrap();
916
917        let mut output = fs::File::create(&output_path).unwrap();
918        process_file(
919            base_path,
920            &file_path,
921            &mut output,
922            false,
923            Some("detect"),
924            &TreeSitterConfig::default(),
925        )
926        .unwrap();
927
928        let content = fs::read_to_string(&output_path).unwrap();
929
930        // Should contain transcoded content with UTF-8 equivalents
931        assert!(content.contains("Hello"));
932        assert!(content.contains("World"));
933        // Should use text language
934        assert!(content.contains("```txt"));
935
936        // Ensure the code block is closed
937        let fence_count = content.matches("```").count();
938        assert!(
939            fence_count >= 2,
940            "expected at least opening and closing fences, got {}",
941            fence_count
942        );
943    }
944
945    #[test]
946    fn test_encoding_strategy_strict() {
947        let dir = tempdir().unwrap();
948        let base_path = dir.path();
949        let output_path = base_path.join("out.md");
950
951        // Create a file with non-UTF-8 content
952        let non_utf8_content = [0xFF, 0xFE, 0x41, 0x00]; // UTF-16 LE BOM + "A"
953        let file_path = base_path.join("utf16.txt");
954        fs::write(&file_path, non_utf8_content).unwrap();
955
956        let mut output = fs::File::create(&output_path).unwrap();
957        process_file(
958            base_path,
959            &file_path,
960            &mut output,
961            false,
962            Some("strict"),
963            &TreeSitterConfig::default(),
964        )
965        .unwrap();
966
967        let content = fs::read_to_string(&output_path).unwrap();
968
969        // Should contain binary file placeholder
970        assert!(content.contains("<Binary file or unsupported encoding:"));
971        assert!(content.contains("```text"));
972
973        // Ensure the code block is closed
974        let fence_count = content.matches("```").count();
975        assert!(
976            fence_count >= 2,
977            "expected at least opening and closing fences, got {}",
978            fence_count
979        );
980    }
981
982    #[test]
983    fn test_encoding_strategy_skip() {
984        let dir = tempdir().unwrap();
985        let base_path = dir.path();
986        let output_path = base_path.join("out.md");
987
988        // Create a file with UTF-16 content
989        let utf16_content = [0xFF, 0xFE, 0x48, 0x00, 0x69, 0x00]; // UTF-16 LE "Hi"
990        let file_path = base_path.join("utf16.txt");
991        fs::write(&file_path, utf16_content).unwrap();
992
993        let mut output = fs::File::create(&output_path).unwrap();
994        process_file(
995            base_path,
996            &file_path,
997            &mut output,
998            false,
999            Some("skip"),
1000            &TreeSitterConfig::default(),
1001        )
1002        .unwrap();
1003
1004        let content = fs::read_to_string(&output_path).unwrap();
1005
1006        // Should contain binary file placeholder (skipped transcoding)
1007        assert!(content.contains("<Binary file or unsupported encoding:"));
1008        assert!(content.contains("```text"));
1009    }
1010
1011    #[test]
1012    #[serial]
1013    fn test_generate_markdown_with_current_directory() {
1014        let dir = tempdir().unwrap();
1015        let base_path = dir.path();
1016        let output_path = base_path.join("test.md");
1017
1018        // Create test files
1019        fs::write(base_path.join("readme.txt"), "Hello world").unwrap();
1020
1021        // Collect files
1022        let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
1023        let file_tree = crate::tree::build_file_tree(&files, base_path);
1024
1025        // Change to the test directory
1026        let original_dir = std::env::current_dir().unwrap();
1027        std::env::set_current_dir(base_path).unwrap();
1028
1029        // Test with "." as input directory
1030        let result = generate_markdown(
1031            &output_path.to_string_lossy(),
1032            ".",
1033            &[],
1034            &[],
1035            &file_tree,
1036            &files,
1037            base_path,
1038            false,
1039            None,
1040            None, // max_tokens
1041            &TreeSitterConfig::default(),
1042        );
1043
1044        // Restore original directory
1045        std::env::set_current_dir(original_dir).unwrap();
1046
1047        assert!(result.is_ok());
1048        let content = fs::read_to_string(&output_path).unwrap();
1049        assert!(content.contains("Directory Structure Report"));
1050    }
1051
1052    #[test]
1053    fn test_generate_markdown_creates_output_directory() {
1054        let dir = tempdir().unwrap();
1055        let base_path = dir.path();
1056        let nested_output = base_path.join("nested").join("deep").join("output.md");
1057
1058        // Create test files
1059        fs::write(base_path.join("test.txt"), "content").unwrap();
1060
1061        let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
1062        let file_tree = crate::tree::build_file_tree(&files, base_path);
1063
1064        let result = generate_markdown(
1065            &nested_output.to_string_lossy(),
1066            "test_dir",
1067            &[],
1068            &[],
1069            &file_tree,
1070            &files,
1071            base_path,
1072            false,
1073            None,
1074            None, // max_tokens
1075            &TreeSitterConfig::default(),
1076        );
1077
1078        assert!(result.is_ok());
1079        assert!(nested_output.exists());
1080        assert!(nested_output.parent().unwrap().exists());
1081    }
1082
1083    #[test]
1084    fn test_generate_markdown_with_filters_and_ignores() {
1085        let dir = tempdir().unwrap();
1086        let base_path = dir.path();
1087        let output_path = base_path.join("filtered.md");
1088
1089        fs::write(base_path.join("main.rs"), "fn main() {}").unwrap();
1090        fs::write(base_path.join("config.toml"), "[package]").unwrap();
1091        fs::write(base_path.join("readme.md"), "# README").unwrap();
1092
1093        let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
1094        let file_tree = crate::tree::build_file_tree(&files, base_path);
1095
1096        let result = generate_markdown(
1097            &output_path.to_string_lossy(),
1098            "project",
1099            &["rs".to_string(), "toml".to_string()],
1100            &["readme.md".to_string()],
1101            &file_tree,
1102            &files,
1103            base_path,
1104            true,
1105            Some("strict"),
1106            None, // max_tokens
1107            &TreeSitterConfig::default(),
1108        );
1109
1110        assert!(result.is_ok());
1111        let content = fs::read_to_string(&output_path).unwrap();
1112        assert!(content.contains("Directory Structure Report"));
1113        // The actual generate_markdown function doesn't format filters/ignores this way
1114        assert!(content.contains("main.rs") || content.contains("config.toml"));
1115    }
1116
1117    #[test]
1118    fn test_write_text_content_with_line_numbers() {
1119        let mut output = Vec::new();
1120        let content = "line one\nline two\nline three";
1121
1122        write_text_content(&mut output, content, "rust", true).unwrap();
1123
1124        let result = String::from_utf8(output).unwrap();
1125        assert!(result.contains("```rust"));
1126        assert!(result.contains("   1 | line one"));
1127        assert!(result.contains("   2 | line two"));
1128        assert!(result.contains("   3 | line three"));
1129        assert!(result.contains("```"));
1130    }
1131
1132    #[test]
1133    fn test_write_text_content_without_line_numbers() {
1134        let mut output = Vec::new();
1135        let content = "function test() {\n  return true;\n}";
1136
1137        write_text_content(&mut output, content, "javascript", false).unwrap();
1138
1139        let result = String::from_utf8(output).unwrap();
1140        assert!(result.contains("```javascript"));
1141        assert!(result.contains("function test() {"));
1142        assert!(result.contains("  return true;"));
1143        assert!(result.contains("```"));
1144        assert!(!result.contains(" | ")); // No line number prefix
1145    }
1146
1147    #[test]
1148    fn test_write_text_content_without_trailing_newline() {
1149        let mut output = Vec::new();
1150        let content = "no newline at end"; // No \n at end
1151
1152        write_text_content(&mut output, content, "text", false).unwrap();
1153
1154        let result = String::from_utf8(output).unwrap();
1155        assert!(result.contains("```text"));
1156        assert!(result.contains("no newline at end"));
1157        assert!(result.ends_with("```\n")); // Should add newline
1158    }
1159
1160    #[test]
1161    fn test_is_likely_text() {
1162        // Normal text should be considered text
1163        assert!(is_likely_text("Hello world\nThis is normal text"));
1164
1165        // Text with some control characters should still be text
1166        assert!(is_likely_text(
1167            "Line 1\nLine 2\tTabbed\r\nWindows line ending"
1168        ));
1169
1170        // Text with too many control characters should not be text
1171        let mut bad_text = String::new();
1172        for i in 0..200 {
1173            if i % 5 == 0 {
1174                bad_text.push('\x01'); // Control character
1175            } else {
1176                bad_text.push('a');
1177            }
1178        }
1179        assert!(!is_likely_text(&bad_text));
1180
1181        // Empty string should be considered text
1182        assert!(is_likely_text(""));
1183    }
1184
1185    #[test]
1186    fn test_detect_text_encoding() {
1187        // UTF-8 should return None (already UTF-8)
1188        let utf8_bytes = "Hello world".as_bytes();
1189        let result = detect_text_encoding(utf8_bytes);
1190        // The function may return an encoding even for UTF-8 text if it detects it differently
1191        // Just verify it doesn't crash
1192        assert!(result.is_some() || result.is_none());
1193
1194        // Windows-1252 encoded text should be detected
1195        let windows1252_bytes = [
1196            0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x94,
1197        ];
1198        let detected = detect_text_encoding(&windows1252_bytes);
1199        assert!(detected.is_some());
1200    }
1201
1202    #[test]
1203    fn test_transcode_file_content() {
1204        let dir = tempdir().unwrap();
1205        let file_path = dir.path().join("windows1252.txt");
1206
1207        // Write Windows-1252 encoded content
1208        let windows1252_content = [
1209            0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, // "Hello "
1210            0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, // "World" with smart quotes
1211        ];
1212        fs::write(&file_path, windows1252_content).unwrap();
1213
1214        let result = transcode_file_content(&file_path, encoding_rs::WINDOWS_1252);
1215        assert!(result.is_ok());
1216
1217        let transcoded = result.unwrap();
1218        assert!(transcoded.contains("Hello"));
1219        assert!(transcoded.contains("World"));
1220    }
1221
1222    #[test]
1223    fn test_process_file_with_metadata_error() {
1224        let dir = tempdir().unwrap();
1225        let base_path = dir.path();
1226        let nonexistent_file = base_path.join("nonexistent.txt");
1227        let output_path = base_path.join("output.md");
1228
1229        let mut output = fs::File::create(&output_path).unwrap();
1230
1231        // This should handle the metadata error gracefully
1232        let result = process_file(
1233            base_path,
1234            &nonexistent_file,
1235            &mut output,
1236            false,
1237            None,
1238            &TreeSitterConfig::default(),
1239        );
1240        assert!(result.is_ok());
1241
1242        // Output should be minimal since file doesn't exist
1243        let content = fs::read_to_string(&output_path).unwrap();
1244        assert!(content.is_empty() || content.trim().is_empty());
1245    }
1246
1247    #[test]
1248    fn test_process_file_with_different_extensions() {
1249        let dir = tempdir().unwrap();
1250        let base_path = dir.path();
1251        let output_path = base_path.join("output.md");
1252
1253        // Test various file extensions
1254        let test_files = [
1255            ("script.py", "print('hello')", "python"),
1256            ("data.json", r#"{"key": "value"}"#, "json"),
1257            ("config.yaml", "key: value", "yaml"),
1258            ("style.css", "body { margin: 0; }", "css"),
1259            ("page.html", "<html><body>Test</body></html>", "html"),
1260            ("query.sql", "SELECT * FROM users;", "sql"),
1261            ("build.sh", "#!/bin/bash\necho 'building'", "bash"),
1262            ("unknown.xyz", "unknown content", "xyz"),
1263        ];
1264
1265        for (filename, content, expected_lang) in test_files.iter() {
1266            let file_path = base_path.join(filename);
1267            fs::write(&file_path, content).unwrap();
1268
1269            let mut output = fs::File::create(&output_path).unwrap();
1270            process_file(
1271                base_path,
1272                &file_path,
1273                &mut output,
1274                false,
1275                None,
1276                &TreeSitterConfig::default(),
1277            )
1278            .unwrap();
1279
1280            let result = fs::read_to_string(&output_path).unwrap();
1281            assert!(result.contains(&format!("```{}", expected_lang)));
1282            assert!(result.contains(content));
1283            assert!(result.contains(filename));
1284        }
1285    }
1286
1287    #[test]
1288    fn test_process_file_with_seek_error_handling() {
1289        let dir = tempdir().unwrap();
1290        let base_path = dir.path();
1291        let output_path = base_path.join("output.md");
1292
1293        let file_path = base_path.join("test.txt");
1294        fs::write(&file_path, "test content").unwrap();
1295
1296        let mut output = fs::File::create(&output_path).unwrap();
1297
1298        let result = process_file(
1299            base_path,
1300            &file_path,
1301            &mut output,
1302            false,
1303            None,
1304            &TreeSitterConfig::default(),
1305        );
1306
1307        assert!(result.is_ok());
1308    }
1309
1310    #[test]
1311    fn test_process_file_jsx_tsx_extensions() {
1312        let dir = tempdir().unwrap();
1313        let base_path = dir.path();
1314        let output_path = base_path.join("output.md");
1315
1316        let jsx_file = base_path.join("component.jsx");
1317        fs::write(&jsx_file, "const App = () => <div/>;").unwrap();
1318
1319        let mut output = fs::File::create(&output_path).unwrap();
1320        process_file(
1321            base_path,
1322            &jsx_file,
1323            &mut output,
1324            false,
1325            None,
1326            &TreeSitterConfig::default(),
1327        )
1328        .unwrap();
1329
1330        let content = fs::read_to_string(&output_path).unwrap();
1331        assert!(content.contains("```jsx"));
1332    }
1333
1334    #[test]
1335    fn test_process_file_various_lock_extensions() {
1336        let dir = tempdir().unwrap();
1337        let base_path = dir.path();
1338        let output_path = base_path.join("output.md");
1339
1340        let lock_file = base_path.join("Cargo.lock");
1341        fs::write(&lock_file, "[package]\nname = \"test\"").unwrap();
1342
1343        let mut output = fs::File::create(&output_path).unwrap();
1344        process_file(
1345            base_path,
1346            &lock_file,
1347            &mut output,
1348            false,
1349            None,
1350            &TreeSitterConfig::default(),
1351        )
1352        .unwrap();
1353
1354        let content = fs::read_to_string(&output_path).unwrap();
1355        assert!(content.contains("```toml"));
1356    }
1357
1358    #[test]
1359    fn test_process_file_java_cpp_extensions() {
1360        let dir = tempdir().unwrap();
1361        let base_path = dir.path();
1362
1363        let java_file = base_path.join("Main.java");
1364        fs::write(&java_file, "class Main {}").unwrap();
1365
1366        let cpp_file = base_path.join("main.cpp");
1367        fs::write(&cpp_file, "int main() {}").unwrap();
1368
1369        let c_file = base_path.join("main.c");
1370        fs::write(&c_file, "int main() {}").unwrap();
1371
1372        let h_file = base_path.join("header.h");
1373        fs::write(&h_file, "void func();").unwrap();
1374
1375        let hpp_file = base_path.join("header.hpp");
1376        fs::write(&hpp_file, "void func();").unwrap();
1377
1378        for (file, lang) in [
1379            (&java_file, "java"),
1380            (&cpp_file, "cpp"),
1381            (&c_file, "c"),
1382            (&h_file, "c"),
1383            (&hpp_file, "cpp"),
1384        ] {
1385            let output_path = base_path.join("output.md");
1386            let mut output = fs::File::create(&output_path).unwrap();
1387            process_file(
1388                base_path,
1389                file,
1390                &mut output,
1391                false,
1392                None,
1393                &TreeSitterConfig::default(),
1394            )
1395            .unwrap();
1396
1397            let content = fs::read_to_string(&output_path).unwrap();
1398            assert!(content.contains(&format!("```{}", lang)));
1399        }
1400    }
1401
1402    #[test]
1403    fn test_process_file_with_bom() {
1404        let dir = tempdir().unwrap();
1405        let base_path = dir.path();
1406        let output_path = base_path.join("output.md");
1407
1408        let bom_file = base_path.join("bom.txt");
1409        let bom_content = [0xEF, 0xBB, 0xBF, b'H', b'e', b'l', b'l', b'o'];
1410        fs::write(&bom_file, bom_content).unwrap();
1411
1412        let mut output = fs::File::create(&output_path).unwrap();
1413        process_file(
1414            base_path,
1415            &bom_file,
1416            &mut output,
1417            false,
1418            Some("detect"),
1419            &TreeSitterConfig::default(),
1420        )
1421        .unwrap();
1422
1423        let content = fs::read_to_string(&output_path).unwrap();
1424        assert!(content.contains("Hello") || content.contains("```"));
1425    }
1426
1427    #[test]
1428    fn test_detect_text_encoding_utf16() {
1429        let utf16le_bytes = [0xFF, 0xFE, 0x48, 0x00, 0x69, 0x00];
1430        let result = detect_text_encoding(&utf16le_bytes);
1431        assert!(result.is_some());
1432    }
1433
1434    #[test]
1435    fn test_detect_text_encoding_shift_jis() {
1436        let shift_jis_bytes = [0x82, 0xB1, 0x82, 0xF1, 0x82, 0xC9, 0x82, 0xBF, 0x82, 0xCD];
1437        let result = detect_text_encoding(&shift_jis_bytes);
1438        assert!(result.is_some() || result.is_none());
1439    }
1440
1441    #[test]
1442    fn test_transcode_file_content_with_errors() {
1443        let dir = tempdir().unwrap();
1444        let file_path = dir.path().join("test.txt");
1445
1446        fs::write(&file_path, b"test content").unwrap();
1447
1448        let result = transcode_file_content(&file_path, encoding_rs::UTF_16LE);
1449        assert!(result.is_ok() || result.is_err());
1450    }
1451
1452    #[test]
1453    fn test_write_tree_sitter_enrichment_no_feature() {
1454        let mut output = Vec::new();
1455        let content = "fn main() {}";
1456
1457        let ts_config = TreeSitterConfig {
1458            signatures: false,
1459            structure: false,
1460            truncate: "smart".to_string(),
1461            visibility: "all".to_string(),
1462        };
1463
1464        let result = write_tree_sitter_enrichment(&mut output, content, "rs", &ts_config);
1465        assert!(result.is_ok());
1466        assert!(output.is_empty());
1467    }
1468
1469    #[test]
1470    fn test_generate_markdown_max_tokens_budget() {
1471        let dir = tempdir().unwrap();
1472        let base_path = dir.path();
1473        let output_path = base_path.join("output.md");
1474
1475        fs::write(base_path.join("file1.txt"), "x".repeat(50000)).unwrap();
1476        fs::write(base_path.join("file2.txt"), "y".repeat(50000)).unwrap();
1477
1478        let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
1479        let file_tree = crate::tree::build_file_tree(&files, base_path);
1480
1481        let result = generate_markdown(
1482            &output_path.to_string_lossy(),
1483            "project",
1484            &[],
1485            &[],
1486            &file_tree,
1487            &files,
1488            base_path,
1489            false,
1490            None,
1491            Some(100),
1492            &TreeSitterConfig::default(),
1493        );
1494
1495        assert!(result.is_ok());
1496        let content = fs::read_to_string(&output_path).unwrap();
1497        assert!(content.contains("Token budget") || content.len() < 1000);
1498    }
1499
1500    #[test]
1501    fn test_process_file_empty_file() {
1502        let dir = tempdir().unwrap();
1503        let base_path = dir.path();
1504        let output_path = base_path.join("output.md");
1505
1506        let empty_file = base_path.join("empty.txt");
1507        fs::write(&empty_file, "").unwrap();
1508
1509        let mut output = fs::File::create(&output_path).unwrap();
1510        let result = process_file(
1511            base_path,
1512            &empty_file,
1513            &mut output,
1514            false,
1515            None,
1516            &TreeSitterConfig::default(),
1517        );
1518
1519        assert!(result.is_ok());
1520        let content = fs::read_to_string(&output_path).unwrap();
1521        assert!(content.contains("empty.txt"));
1522        assert!(content.contains("Size: 0 bytes"));
1523    }
1524
1525    #[test]
1526    fn test_process_file_with_multibyte_utf8() {
1527        let dir = tempdir().unwrap();
1528        let base_path = dir.path();
1529        let output_path = base_path.join("output.md");
1530
1531        let content = "Hello 世界 🌍 Здравствуй";
1532        let unicode_file = base_path.join("unicode.txt");
1533        fs::write(&unicode_file, content).unwrap();
1534
1535        let mut output = fs::File::create(&output_path).unwrap();
1536        let result = process_file(
1537            base_path,
1538            &unicode_file,
1539            &mut output,
1540            true,
1541            None,
1542            &TreeSitterConfig::default(),
1543        );
1544
1545        assert!(result.is_ok());
1546        let output_content = fs::read_to_string(&output_path).unwrap();
1547        assert!(output_content.contains("世界") || output_content.contains("```"));
1548    }
1549
1550    #[test]
1551    fn test_generate_markdown_with_ignores_list() {
1552        let dir = tempdir().unwrap();
1553        let base_path = dir.path();
1554        let output_path = base_path.join("output.md");
1555
1556        fs::write(base_path.join("main.rs"), "fn main() {}").unwrap();
1557        fs::write(base_path.join("test.txt"), "test").unwrap();
1558
1559        let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
1560        let file_tree = crate::tree::build_file_tree(&files, base_path);
1561
1562        let result = generate_markdown(
1563            &output_path.to_string_lossy(),
1564            "project",
1565            &[],
1566            &["test.txt".to_string()],
1567            &file_tree,
1568            &files,
1569            base_path,
1570            false,
1571            None,
1572            None,
1573            &TreeSitterConfig::default(),
1574        );
1575
1576        assert!(result.is_ok());
1577        let content = fs::read_to_string(&output_path).unwrap();
1578        assert!(content.contains("Directory Structure Report"));
1579    }
1580}