1use chrono::Utc;
2use ignore::DirEntry;
3use log::{error, info, warn};
4use std::fs;
5use std::io::{self, Read, Seek, SeekFrom, Write};
6use std::path::Path;
7
8use crate::tree::{FileTree, write_tree_to_file};
9use encoding_rs::{Encoding, UTF_8};
10
11#[cfg(feature = "parallel")]
12use crossbeam_channel::{Receiver, Sender, bounded};
13#[cfg(feature = "parallel")]
14use std::thread;
15
16#[derive(Debug, Clone, Default)]
18pub struct TreeSitterConfig {
19 pub signatures: bool,
21 pub structure: bool,
23 pub truncate: String,
25 pub visibility: String,
27}
28
29#[allow(clippy::too_many_arguments, unused_variables)]
31pub fn generate_markdown(
32 output_path: &str,
33 input_dir: &str,
34 filters: &[String],
35 ignores: &[String],
36 file_tree: &FileTree,
37 files: &[DirEntry],
38 base_path: &Path,
39 line_numbers: bool,
40 encoding_strategy: Option<&str>,
41 max_tokens: Option<usize>,
42 ts_config: &TreeSitterConfig,
43) -> io::Result<()> {
44 if let Some(parent) = Path::new(output_path).parent()
45 && !parent.exists()
46 {
47 fs::create_dir_all(parent)?;
48 }
49
50 let mut output = fs::File::create(output_path)?;
51
52 let input_dir_name = if input_dir == "." {
53 let current_dir = std::env::current_dir()?;
54 current_dir
55 .file_name()
56 .and_then(|n| n.to_str())
57 .unwrap_or_else(|| current_dir.to_str().unwrap_or("project"))
58 .to_string()
59 } else {
60 input_dir.to_string()
61 };
62
63 writeln!(output, "# Directory Structure Report\n")?;
65
66 if !filters.is_empty() {
67 writeln!(
68 output,
69 "This document contains files from the `{}` directory with extensions: {}",
70 input_dir_name,
71 filters.join(", ")
72 )?;
73 } else {
74 writeln!(
75 output,
76 "This document contains all files from the `{}` directory, optimized for LLM consumption.",
77 input_dir_name
78 )?;
79 }
80
81 if !ignores.is_empty() {
82 writeln!(output, "Custom ignored patterns: {}", ignores.join(", "))?;
83 }
84
85 let mut content_hasher = xxhash_rust::xxh3::Xxh3::new();
89 for entry in files {
90 let rel_path = entry.path().strip_prefix(base_path).unwrap_or(entry.path());
94 let normalized = rel_path.to_string_lossy().replace('\\', "/");
95 content_hasher.update(normalized.as_bytes());
96 content_hasher.update(b"\0");
98 if let Ok(bytes) = std::fs::read(entry.path()) {
100 content_hasher.update(&bytes);
101 }
102 content_hasher.update(b"\0");
103 }
104 writeln!(output, "Content hash: {:016x}", content_hasher.digest())?;
105 writeln!(output)?;
106
107 writeln!(output, "## File Tree Structure\n")?;
110
111 write_tree_to_file(&mut output, file_tree, 0)?;
112
113 writeln!(output)?;
114
115 #[cfg(feature = "parallel")]
119 {
120 use rayon::prelude::*;
121
122 type ChunkResult = (usize, io::Result<Vec<u8>>);
124 let (sender, receiver): (Sender<ChunkResult>, Receiver<ChunkResult>) =
125 bounded(num_cpus::get() * 2); let writer_handle = {
128 let mut output = output;
129 let total_files = files.len();
130 let budget = max_tokens;
131
132 thread::spawn(move || -> io::Result<()> {
133 let mut completed_chunks = std::collections::BTreeMap::new();
134 let mut next_index = 0;
135 let mut errors = Vec::new();
136 let mut tokens_used: usize = 0;
137 let mut budget_exceeded = false;
138
139 while next_index < total_files {
141 match receiver.recv() {
142 Ok((index, chunk_result)) => {
143 completed_chunks.insert(index, chunk_result);
144
145 while let Some(chunk_result) = completed_chunks.remove(&next_index) {
147 if budget_exceeded {
148 next_index += 1;
150 continue;
151 }
152
153 match chunk_result {
154 Ok(buf) => {
155 let chunk_tokens = buf.len() / 4;
157
158 if let Some(max) = budget
159 && tokens_used + chunk_tokens > max
160 && tokens_used > 0
161 {
162 let remaining = total_files - next_index;
163 let notice = format!(
164 "---\n\n_⚠️ Token budget ({}) reached. {} remaining files omitted._\n\n",
165 max, remaining
166 );
167 if let Err(e) = output.write_all(notice.as_bytes()) {
168 errors.push(format!(
169 "Failed to write truncation notice: {}",
170 e
171 ));
172 }
173 budget_exceeded = true;
174 next_index += 1;
175 continue;
176 }
177
178 tokens_used += chunk_tokens;
179 if let Err(e) = output.write_all(&buf) {
180 errors.push(format!(
181 "Failed to write output for file index {}: {}",
182 next_index, e
183 ));
184 }
185 }
186 Err(e) => {
187 errors.push(format!(
188 "Failed to process file index {}: {}",
189 next_index, e
190 ));
191 }
192 }
193 next_index += 1;
194 }
195 }
196 Err(_) => break, }
198 }
199
200 if !errors.is_empty() {
201 error!(
202 "Encountered {} errors during parallel processing:",
203 errors.len()
204 );
205 for err in &errors {
206 error!(" {}", err);
207 }
208 return Err(std::io::Error::other(format!(
209 "Failed to process {} files: {}",
210 errors.len(),
211 errors.join("; ")
212 )));
213 }
214
215 Ok(())
216 })
217 };
218
219 let ts_config_clone = ts_config.clone();
221 files.par_iter().enumerate().for_each(|(index, entry)| {
222 let mut buf = Vec::new();
223 let result = process_file(
224 base_path,
225 entry.path(),
226 &mut buf,
227 line_numbers,
228 encoding_strategy,
229 &ts_config_clone,
230 )
231 .map(|_| buf);
232
233 let _ = sender.send((index, result));
235 });
236
237 drop(sender);
239
240 writer_handle
242 .join()
243 .map_err(|_| std::io::Error::other("Writer thread panicked"))??;
244 }
245
246 #[cfg(not(feature = "parallel"))]
247 {
248 let mut tokens_used: usize = 0;
249
250 for (idx, entry) in files.iter().enumerate() {
251 let file_size = std::fs::metadata(entry.path())
253 .map(|m| m.len())
254 .unwrap_or(0);
255 let estimated_file_tokens = (file_size as usize) / 4;
256
257 if let Some(budget) = max_tokens {
258 if tokens_used + estimated_file_tokens > budget && tokens_used > 0 {
259 let remaining = files.len() - idx;
260 writeln!(output, "---\n")?;
261 writeln!(
262 output,
263 "_⚠️ Token budget ({}) reached. {} remaining files omitted._\n",
264 budget, remaining
265 )?;
266 break;
267 }
268 }
269
270 tokens_used += estimated_file_tokens;
271 process_file(
272 base_path,
273 entry.path(),
274 &mut output,
275 line_numbers,
276 encoding_strategy,
277 ts_config,
278 )?;
279 }
280 }
281
282 Ok(())
283}
284
285pub fn process_file(
287 base_path: &Path,
288 file_path: &Path,
289 output: &mut impl Write,
290 line_numbers: bool,
291 encoding_strategy: Option<&str>,
292 ts_config: &TreeSitterConfig,
293) -> io::Result<()> {
294 let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
295 info!("Processing file: {}", relative_path.display());
296
297 let metadata = match fs::metadata(file_path) {
298 Ok(meta) => meta,
299 Err(e) => {
300 error!(
301 "Failed to get metadata for {}: {}",
302 relative_path.display(),
303 e
304 );
305 return Ok(());
306 }
307 };
308
309 let modified_time = metadata
310 .modified()
311 .ok()
312 .map(|time| {
313 let system_time: chrono::DateTime<Utc> = time.into();
314 system_time.format("%Y-%m-%d %H:%M:%S UTC").to_string()
315 })
316 .unwrap_or_else(|| "Unknown".to_string());
317
318 writeln!(output)?;
319 writeln!(output, "### File: `{}`", relative_path.display())?;
320
321 writeln!(output)?;
322
323 writeln!(output, "- Size: {} bytes", metadata.len())?;
324 writeln!(output, "- Modified: {}", modified_time)?;
325 writeln!(output)?;
326
327 let extension = file_path
329 .extension()
330 .and_then(|s| s.to_str())
331 .unwrap_or("text");
332 let language = match extension {
333 "rs" => "rust",
334 "js" => "javascript",
335 "ts" => "typescript",
336 "jsx" => "jsx",
337 "tsx" => "tsx",
338 "json" => "json",
339 "toml" => "toml",
340 "md" => "markdown",
341 "yaml" | "yml" => "yaml",
342 "html" => "html",
343 "css" => "css",
344 "py" => "python",
345 "java" => "java",
346 "cpp" => "cpp",
347 "c" => "c",
348 "h" => "c",
349 "hpp" => "cpp",
350 "sql" => "sql",
351 "sh" => "bash",
352 "xml" => "xml",
353 "lock" => "toml",
354 _ => extension,
355 };
356
357 match fs::File::open(file_path) {
359 Ok(mut file) => {
360 let mut sniff = [0u8; 8192];
361 let n = match file.read(&mut sniff) {
362 Ok(n) => n,
363 Err(e) => {
364 warn!(
365 "Could not read file {}: {}. Skipping content.",
366 relative_path.display(),
367 e
368 );
369
370 writeln!(output, "```text")?;
371
372 writeln!(
373 output,
374 "<Could not read file content (e.g., binary file or permission error)>"
375 )?;
376
377 writeln!(output, "```")?;
378
379 return Ok(());
380 }
381 };
382 let slice = &sniff[..n];
383
384 let check_len = if n == sniff.len() {
388 let mut end = n;
390 while end > 0 && end > n.saturating_sub(4) && sniff[end - 1] & 0xC0 == 0x80 {
391 end -= 1; }
393 if end > 0 && end < n {
395 let leading = sniff[end - 1];
396 let expected_len = if leading & 0xE0 == 0xC0 {
397 2
398 } else if leading & 0xF0 == 0xE0 {
399 3
400 } else if leading & 0xF8 == 0xF0 {
401 4
402 } else {
403 1
404 };
405 if end - 1 + expected_len > n {
406 end - 1 } else {
408 n
409 }
410 } else {
411 n
412 }
413 } else {
414 n };
416
417 let is_utf8 = std::str::from_utf8(&sniff[..check_len]).is_ok();
419
420 if is_utf8 && !slice.contains(&0) {
421 } else {
423 let (encoding, _consumed) =
426 encoding_rs::Encoding::for_bom(slice).unwrap_or((encoding_rs::UTF_8, 0));
427
428 let detected_encoding = if encoding == UTF_8 {
430 detect_text_encoding(slice)
432 } else {
433 Some(encoding)
434 };
435
436 match detected_encoding {
437 Some(enc) if enc != UTF_8 => {
438 let strategy = encoding_strategy.unwrap_or("detect");
439 match strategy {
440 "strict" | "skip" => {
441 warn!(
443 "Skipping non-UTF-8 file {} (encoding: {}, strategy: {})",
444 relative_path.display(),
445 enc.name(),
446 strategy
447 );
448 }
449 _ => {
450 match transcode_file_content(file_path, enc) {
452 Ok(transcoded_content) => {
453 info!(
454 "Successfully transcoded {} from {} to UTF-8",
455 relative_path.display(),
456 enc.name()
457 );
458 write_text_content(
459 output,
460 &transcoded_content,
461 language,
462 line_numbers,
463 )?;
464 return Ok(());
465 }
466 Err(e) => {
467 warn!(
468 "Failed to transcode {} from {}: {}. Treating as binary.",
469 relative_path.display(),
470 enc.name(),
471 e
472 );
473 }
474 }
475 }
476 }
477 }
478 _ => {
479 if slice.contains(&0) {
481 warn!(
482 "Detected binary file {} (contains null bytes). Skipping content.",
483 relative_path.display()
484 );
485 } else {
486 warn!(
487 "Could not determine encoding for {}. Treating as binary.",
488 relative_path.display()
489 );
490 }
491 }
492 }
493
494 writeln!(output, "```text")?;
496 writeln!(
497 output,
498 "<Binary file or unsupported encoding: {} bytes>",
499 metadata.len()
500 )?;
501 writeln!(output, "```")?;
502 return Ok(());
503 }
504
505 if let Err(e) = file.seek(SeekFrom::Start(0)) {
507 warn!(
508 "Could not reset file cursor for {}: {}. Skipping content.",
509 relative_path.display(),
510 e
511 );
512 writeln!(output, "```text")?;
513 writeln!(
514 output,
515 "<Could not read file content (e.g., binary file or permission error)>"
516 )?;
517 writeln!(output, "```")?;
518 return Ok(());
519 }
520
521 let content = match std::fs::read_to_string(file_path) {
523 Ok(content) => content,
524 Err(e) => {
525 warn!(
526 "Error reading file {}: {}. Output may be truncated.",
527 relative_path.display(),
528 e
529 );
530 writeln!(output, "```text")?;
531 writeln!(output, "<Error reading file content>")?;
532 writeln!(output, "```")?;
533 return Ok(());
534 }
535 };
536 let signatures_only = ts_config.signatures;
538
539 if !signatures_only {
540 write_text_content(output, &content, language, line_numbers)?;
545 }
546
547 write_tree_sitter_enrichment(output, &content, extension, ts_config)?;
549 }
550 Err(e) => {
551 warn!(
552 "Could not open file {}: {}. Skipping content.",
553 relative_path.display(),
554 e
555 );
556 writeln!(output, "```text")?;
557 writeln!(
558 output,
559 "<Could not read file content (e.g., binary file or permission error)>"
560 )?;
561 writeln!(output, "```")?;
562 }
563 }
564
565 Ok(())
566}
567
568#[allow(unused_variables)]
570fn write_tree_sitter_enrichment(
571 output: &mut impl Write,
572 content: &str,
573 extension: &str,
574 ts_config: &TreeSitterConfig,
575) -> io::Result<()> {
576 if !ts_config.signatures && !ts_config.structure {
577 return Ok(());
578 }
579
580 #[cfg(feature = "tree-sitter-base")]
581 {
582 use crate::tree_sitter::language_support::Visibility;
583
584 let vis_filter: Visibility = ts_config.visibility.parse().unwrap_or(Visibility::All);
585
586 if ts_config.structure
587 && let Some(structure) =
588 crate::tree_sitter::extract_structure_for_file(content, extension)
589 {
590 let summary = crate::tree_sitter::structure::format_structure_as_markdown(&structure);
591 if !summary.is_empty() {
592 writeln!(output)?;
593 write!(output, "{}", summary)?;
594 }
595 }
596
597 if ts_config.signatures
598 && let Some(signatures) =
599 crate::tree_sitter::extract_signatures_for_file(content, extension, vis_filter)
600 && !signatures.is_empty()
601 {
602 let language = match extension {
603 "rs" => "rust",
604 "js" | "mjs" | "cjs" => "javascript",
605 "ts" | "tsx" | "mts" | "cts" => "typescript",
606 "py" | "pyw" => "python",
607 "go" => "go",
608 "java" => "java",
609 "c" | "h" => "c",
610 "cpp" | "cxx" | "cc" | "hpp" | "hxx" | "hh" => "cpp",
611 _ => extension,
612 };
613 writeln!(output)?;
614 writeln!(output, "**Signatures:**")?;
615 writeln!(output)?;
616 let formatted = crate::tree_sitter::signatures::format_signatures_as_markdown(
617 &signatures,
618 language,
619 );
620 write!(output, "{}", formatted)?;
621 }
622 }
623
624 #[cfg(not(feature = "tree-sitter-base"))]
625 {
626 }
629
630 Ok(())
631}
632
633fn detect_text_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
635 let encodings = [
637 encoding_rs::WINDOWS_1252,
638 encoding_rs::UTF_16LE,
639 encoding_rs::UTF_16BE,
640 encoding_rs::SHIFT_JIS,
641 ];
642
643 for encoding in &encodings {
644 let (decoded, _, had_errors) = encoding.decode(bytes);
645 if !had_errors && is_likely_text(&decoded) {
646 return Some(encoding);
647 }
648 }
649
650 None
651}
652
653fn is_likely_text(content: &str) -> bool {
655 let mut control_chars = 0;
656 let mut total_chars = 0;
657
658 for ch in content.chars() {
659 total_chars += 1;
660 if ch.is_control() && ch != '\n' && ch != '\r' && ch != '\t' {
661 control_chars += 1;
662 }
663
664 if total_chars > 100 && control_chars * 20 > total_chars {
666 return false;
667 }
668 }
669
670 if total_chars > 0 {
672 control_chars * 20 <= total_chars
673 } else {
674 true
675 }
676}
677
678fn transcode_file_content(file_path: &Path, encoding: &'static Encoding) -> io::Result<String> {
680 let bytes = std::fs::read(file_path)?;
681 let (decoded, _, had_errors) = encoding.decode(&bytes);
682
683 if had_errors {
684 return Err(io::Error::new(
685 io::ErrorKind::InvalidData,
686 format!("Failed to decode file with encoding {}", encoding.name()),
687 ));
688 }
689
690 Ok(decoded.into_owned())
691}
692
693fn write_text_content(
695 output: &mut impl Write,
696 content: &str,
697 language: &str,
698 line_numbers: bool,
699) -> io::Result<()> {
700 writeln!(output, "```{}", language)?;
701
702 if line_numbers {
703 for (i, line) in content.lines().enumerate() {
704 writeln!(output, "{:>4} | {}", i + 1, line)?;
705 }
706 } else {
707 output.write_all(content.as_bytes())?;
708 if !content.ends_with('\n') {
709 writeln!(output)?;
710 }
711 }
712
713 writeln!(output, "```")?;
714 Ok(())
715}
716
717#[cfg(test)]
718mod tests {
719 use super::*;
720 use std::fs;
721 use tempfile::tempdir;
722
723 #[test]
724 fn test_code_block_formatting() {
725 let dir = tempdir().unwrap();
726 let base_path = dir.path();
727 let file_path = base_path.join("test.rs");
728 let output_path = base_path.join("output.md");
729
730 fs::write(
732 &file_path,
733 "fn main() {\n println!(\"Hello, world!\");\n}",
734 )
735 .unwrap();
736
737 let mut output = fs::File::create(&output_path).unwrap();
739
740 process_file(
742 base_path,
743 &file_path,
744 &mut output,
745 false,
746 None,
747 &TreeSitterConfig::default(),
748 )
749 .unwrap();
750
751 let content = fs::read_to_string(&output_path).unwrap();
753
754 assert!(content.contains("```rust"));
756 assert!(content.contains("```") && content.matches("```").count() >= 2);
757 }
758
759 #[test]
760 fn test_markdown_file_formatting() {
761 let dir = tempdir().unwrap();
762 let base_path = dir.path();
763 let file_path = base_path.join("README.md");
764 let output_path = base_path.join("output.md");
765
766 fs::write(&file_path, "# Test\n\nThis is a test markdown file.").unwrap();
768
769 let mut output = fs::File::create(&output_path).unwrap();
771
772 process_file(
774 base_path,
775 &file_path,
776 &mut output,
777 false,
778 None,
779 &TreeSitterConfig::default(),
780 )
781 .unwrap();
782
783 let content = fs::read_to_string(&output_path).unwrap();
785
786 println!("Generated content:\n{}", content);
788
789 assert!(
791 content.contains("```markdown"),
792 "Content should contain '```markdown' but was: {}",
793 content
794 );
795 let code_block_markers = content.matches("```").count();
797
798 assert!(
799 code_block_markers >= 2,
800 "Expected at least 2 code block markers, found {}",
801 code_block_markers
802 );
803 }
804
805 #[test]
806 fn test_line_numbered_code_blocks() {
807 let dir = tempdir().unwrap();
808 let base_path = dir.path();
809 let file_path = base_path.join("lib.rs");
810 let output_path = base_path.join("out.md");
811
812 fs::write(
814 &file_path,
815 "fn add(a: i32, b: i32) -> i32 {\n a + b\n}\n\nfn main() {\n println!(\"{}\", add(1, 2));\n}\n",
816 )
817 .unwrap();
818
819 let mut output = fs::File::create(&output_path).unwrap();
820 process_file(
821 base_path,
822 &file_path,
823 &mut output,
824 true,
825 None,
826 &TreeSitterConfig::default(),
827 )
828 .unwrap();
829
830 let content = fs::read_to_string(&output_path).unwrap();
831
832 assert!(content.contains("```rust"));
834 assert!(content.contains(" 1 | "));
835 assert!(content.contains(" 2 | "));
836
837 let numbered_lines = content
839 .lines()
840 .filter(|l| {
841 l.trim_start()
842 .chars()
843 .next()
844 .map(|c| c.is_ascii_digit())
845 .unwrap_or(false)
846 && l.contains(" | ")
847 })
848 .count();
849 let original_line_count = fs::read_to_string(&file_path).unwrap().lines().count();
850 assert_eq!(numbered_lines, original_line_count);
851
852 assert!(content.contains("```"));
854 }
855
856 #[test]
857 fn test_binary_file_handling() {
858 let dir = tempdir().unwrap();
859 let base_path = dir.path();
860 let file_path = base_path.join("image.bin");
861 let output_path = base_path.join("out.md");
862
863 let bytes = vec![
865 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
870 fs::write(&file_path, bytes).unwrap();
871
872 let mut output = fs::File::create(&output_path).unwrap();
873 process_file(
874 base_path,
875 &file_path,
876 &mut output,
877 false,
878 None,
879 &TreeSitterConfig::default(),
880 )
881 .unwrap();
882
883 let content = fs::read_to_string(&output_path).unwrap();
884
885 assert!(content.contains("```text"));
887 assert!(content.contains("<Binary file or unsupported encoding:"));
888
889 let fence_count = content.matches("```").count();
891 assert!(
892 fence_count >= 2,
893 "expected at least opening and closing fences, got {}",
894 fence_count
895 );
896 }
897
898 #[test]
899 fn test_encoding_detection_and_transcoding() {
900 let dir = tempdir().unwrap();
901 let base_path = dir.path();
902 let output_path = base_path.join("out.md");
903
904 let windows1252_content = [
906 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, 0x0A, ];
910 let file_path = base_path.join("windows1252.txt");
911 fs::write(&file_path, windows1252_content).unwrap();
912
913 let mut output = fs::File::create(&output_path).unwrap();
914 process_file(
915 base_path,
916 &file_path,
917 &mut output,
918 false,
919 Some("detect"),
920 &TreeSitterConfig::default(),
921 )
922 .unwrap();
923
924 let content = fs::read_to_string(&output_path).unwrap();
925
926 assert!(content.contains("Hello"));
928 assert!(content.contains("World"));
929 assert!(content.contains("```txt"));
931
932 let fence_count = content.matches("```").count();
934 assert!(
935 fence_count >= 2,
936 "expected at least opening and closing fences, got {}",
937 fence_count
938 );
939 }
940
941 #[test]
942 fn test_encoding_strategy_strict() {
943 let dir = tempdir().unwrap();
944 let base_path = dir.path();
945 let output_path = base_path.join("out.md");
946
947 let non_utf8_content = [0xFF, 0xFE, 0x41, 0x00]; let file_path = base_path.join("utf16.txt");
950 fs::write(&file_path, non_utf8_content).unwrap();
951
952 let mut output = fs::File::create(&output_path).unwrap();
953 process_file(
954 base_path,
955 &file_path,
956 &mut output,
957 false,
958 Some("strict"),
959 &TreeSitterConfig::default(),
960 )
961 .unwrap();
962
963 let content = fs::read_to_string(&output_path).unwrap();
964
965 assert!(content.contains("<Binary file or unsupported encoding:"));
967 assert!(content.contains("```text"));
968
969 let fence_count = content.matches("```").count();
971 assert!(
972 fence_count >= 2,
973 "expected at least opening and closing fences, got {}",
974 fence_count
975 );
976 }
977
978 #[test]
979 fn test_encoding_strategy_skip() {
980 let dir = tempdir().unwrap();
981 let base_path = dir.path();
982 let output_path = base_path.join("out.md");
983
984 let utf16_content = [0xFF, 0xFE, 0x48, 0x00, 0x69, 0x00]; let file_path = base_path.join("utf16.txt");
987 fs::write(&file_path, utf16_content).unwrap();
988
989 let mut output = fs::File::create(&output_path).unwrap();
990 process_file(
991 base_path,
992 &file_path,
993 &mut output,
994 false,
995 Some("skip"),
996 &TreeSitterConfig::default(),
997 )
998 .unwrap();
999
1000 let content = fs::read_to_string(&output_path).unwrap();
1001
1002 assert!(content.contains("<Binary file or unsupported encoding:"));
1004 assert!(content.contains("```text"));
1005 }
1006
1007 #[test]
1008 fn test_generate_markdown_with_current_directory() {
1009 let dir = tempdir().unwrap();
1010 let base_path = dir.path();
1011 let output_path = base_path.join("test.md");
1012
1013 fs::write(base_path.join("readme.txt"), "Hello world").unwrap();
1015
1016 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
1018 let file_tree = crate::tree::build_file_tree(&files, base_path);
1019
1020 let original_dir = std::env::current_dir().unwrap();
1022 std::env::set_current_dir(base_path).unwrap();
1023
1024 let result = generate_markdown(
1026 &output_path.to_string_lossy(),
1027 ".",
1028 &[],
1029 &[],
1030 &file_tree,
1031 &files,
1032 base_path,
1033 false,
1034 None,
1035 None, &TreeSitterConfig::default(),
1037 );
1038
1039 std::env::set_current_dir(original_dir).unwrap();
1041
1042 assert!(result.is_ok());
1043 let content = fs::read_to_string(&output_path).unwrap();
1044 assert!(content.contains("Directory Structure Report"));
1045 }
1046
1047 #[test]
1048 fn test_generate_markdown_creates_output_directory() {
1049 let dir = tempdir().unwrap();
1050 let base_path = dir.path();
1051 let nested_output = base_path.join("nested").join("deep").join("output.md");
1052
1053 fs::write(base_path.join("test.txt"), "content").unwrap();
1055
1056 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
1057 let file_tree = crate::tree::build_file_tree(&files, base_path);
1058
1059 let result = generate_markdown(
1060 &nested_output.to_string_lossy(),
1061 "test_dir",
1062 &[],
1063 &[],
1064 &file_tree,
1065 &files,
1066 base_path,
1067 false,
1068 None,
1069 None, &TreeSitterConfig::default(),
1071 );
1072
1073 assert!(result.is_ok());
1074 assert!(nested_output.exists());
1075 assert!(nested_output.parent().unwrap().exists());
1076 }
1077
1078 #[test]
1079 fn test_generate_markdown_with_filters_and_ignores() {
1080 let dir = tempdir().unwrap();
1081 let base_path = dir.path();
1082 let output_path = base_path.join("filtered.md");
1083
1084 fs::write(base_path.join("main.rs"), "fn main() {}").unwrap();
1085 fs::write(base_path.join("config.toml"), "[package]").unwrap();
1086 fs::write(base_path.join("readme.md"), "# README").unwrap();
1087
1088 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
1089 let file_tree = crate::tree::build_file_tree(&files, base_path);
1090
1091 let result = generate_markdown(
1092 &output_path.to_string_lossy(),
1093 "project",
1094 &["rs".to_string(), "toml".to_string()],
1095 &["readme.md".to_string()],
1096 &file_tree,
1097 &files,
1098 base_path,
1099 true,
1100 Some("strict"),
1101 None, &TreeSitterConfig::default(),
1103 );
1104
1105 assert!(result.is_ok());
1106 let content = fs::read_to_string(&output_path).unwrap();
1107 assert!(content.contains("Directory Structure Report"));
1108 assert!(content.contains("main.rs") || content.contains("config.toml"));
1110 }
1111
1112 #[test]
1113 fn test_write_text_content_with_line_numbers() {
1114 let mut output = Vec::new();
1115 let content = "line one\nline two\nline three";
1116
1117 write_text_content(&mut output, content, "rust", true).unwrap();
1118
1119 let result = String::from_utf8(output).unwrap();
1120 assert!(result.contains("```rust"));
1121 assert!(result.contains(" 1 | line one"));
1122 assert!(result.contains(" 2 | line two"));
1123 assert!(result.contains(" 3 | line three"));
1124 assert!(result.contains("```"));
1125 }
1126
1127 #[test]
1128 fn test_write_text_content_without_line_numbers() {
1129 let mut output = Vec::new();
1130 let content = "function test() {\n return true;\n}";
1131
1132 write_text_content(&mut output, content, "javascript", false).unwrap();
1133
1134 let result = String::from_utf8(output).unwrap();
1135 assert!(result.contains("```javascript"));
1136 assert!(result.contains("function test() {"));
1137 assert!(result.contains(" return true;"));
1138 assert!(result.contains("```"));
1139 assert!(!result.contains(" | ")); }
1141
1142 #[test]
1143 fn test_write_text_content_without_trailing_newline() {
1144 let mut output = Vec::new();
1145 let content = "no newline at end"; write_text_content(&mut output, content, "text", false).unwrap();
1148
1149 let result = String::from_utf8(output).unwrap();
1150 assert!(result.contains("```text"));
1151 assert!(result.contains("no newline at end"));
1152 assert!(result.ends_with("```\n")); }
1154
1155 #[test]
1156 fn test_is_likely_text() {
1157 assert!(is_likely_text("Hello world\nThis is normal text"));
1159
1160 assert!(is_likely_text(
1162 "Line 1\nLine 2\tTabbed\r\nWindows line ending"
1163 ));
1164
1165 let mut bad_text = String::new();
1167 for i in 0..200 {
1168 if i % 5 == 0 {
1169 bad_text.push('\x01'); } else {
1171 bad_text.push('a');
1172 }
1173 }
1174 assert!(!is_likely_text(&bad_text));
1175
1176 assert!(is_likely_text(""));
1178 }
1179
1180 #[test]
1181 fn test_detect_text_encoding() {
1182 let utf8_bytes = "Hello world".as_bytes();
1184 let result = detect_text_encoding(utf8_bytes);
1185 assert!(result.is_some() || result.is_none());
1188
1189 let windows1252_bytes = [
1191 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x94,
1192 ];
1193 let detected = detect_text_encoding(&windows1252_bytes);
1194 assert!(detected.is_some());
1195 }
1196
1197 #[test]
1198 fn test_transcode_file_content() {
1199 let dir = tempdir().unwrap();
1200 let file_path = dir.path().join("windows1252.txt");
1201
1202 let windows1252_content = [
1204 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, ];
1207 fs::write(&file_path, windows1252_content).unwrap();
1208
1209 let result = transcode_file_content(&file_path, encoding_rs::WINDOWS_1252);
1210 assert!(result.is_ok());
1211
1212 let transcoded = result.unwrap();
1213 assert!(transcoded.contains("Hello"));
1214 assert!(transcoded.contains("World"));
1215 }
1216
1217 #[test]
1218 fn test_process_file_with_metadata_error() {
1219 let dir = tempdir().unwrap();
1220 let base_path = dir.path();
1221 let nonexistent_file = base_path.join("nonexistent.txt");
1222 let output_path = base_path.join("output.md");
1223
1224 let mut output = fs::File::create(&output_path).unwrap();
1225
1226 let result = process_file(
1228 base_path,
1229 &nonexistent_file,
1230 &mut output,
1231 false,
1232 None,
1233 &TreeSitterConfig::default(),
1234 );
1235 assert!(result.is_ok());
1236
1237 let content = fs::read_to_string(&output_path).unwrap();
1239 assert!(content.is_empty() || content.trim().is_empty());
1240 }
1241
1242 #[test]
1243 fn test_process_file_with_different_extensions() {
1244 let dir = tempdir().unwrap();
1245 let base_path = dir.path();
1246 let output_path = base_path.join("output.md");
1247
1248 let test_files = [
1250 ("script.py", "print('hello')", "python"),
1251 ("data.json", r#"{"key": "value"}"#, "json"),
1252 ("config.yaml", "key: value", "yaml"),
1253 ("style.css", "body { margin: 0; }", "css"),
1254 ("page.html", "<html><body>Test</body></html>", "html"),
1255 ("query.sql", "SELECT * FROM users;", "sql"),
1256 ("build.sh", "#!/bin/bash\necho 'building'", "bash"),
1257 ("unknown.xyz", "unknown content", "xyz"),
1258 ];
1259
1260 for (filename, content, expected_lang) in test_files.iter() {
1261 let file_path = base_path.join(filename);
1262 fs::write(&file_path, content).unwrap();
1263
1264 let mut output = fs::File::create(&output_path).unwrap();
1265 process_file(
1266 base_path,
1267 &file_path,
1268 &mut output,
1269 false,
1270 None,
1271 &TreeSitterConfig::default(),
1272 )
1273 .unwrap();
1274
1275 let result = fs::read_to_string(&output_path).unwrap();
1276 assert!(result.contains(&format!("```{}", expected_lang)));
1277 assert!(result.contains(content));
1278 assert!(result.contains(filename));
1279 }
1280 }
1281}