1use chrono::Utc;
2use ignore::DirEntry;
3use log::{error, info, warn};
4use std::fs;
5use std::io::{self, Read, Seek, SeekFrom, Write};
6use std::path::Path;
7
8use crate::tree::{FileTree, write_tree_to_file};
9use encoding_rs::{Encoding, UTF_8};
10
11#[cfg(feature = "parallel")]
12use crossbeam_channel::{Receiver, Sender, bounded};
13#[cfg(feature = "parallel")]
14use std::thread;
15
16#[derive(Debug, Clone, Default)]
18pub struct TreeSitterConfig {
19 pub signatures: bool,
21 pub structure: bool,
23 pub truncate: String,
25 pub visibility: String,
27}
28
29#[allow(clippy::too_many_arguments, unused_variables)]
31pub fn generate_markdown(
32 output_path: &str,
33 input_dir: &str,
34 filters: &[String],
35 ignores: &[String],
36 file_tree: &FileTree,
37 files: &[DirEntry],
38 base_path: &Path,
39 line_numbers: bool,
40 encoding_strategy: Option<&str>,
41 max_tokens: Option<usize>,
42 ts_config: &TreeSitterConfig,
43) -> io::Result<()> {
44 if let Some(parent) = Path::new(output_path).parent()
45 && !parent.exists()
46 {
47 fs::create_dir_all(parent)?;
48 }
49
50 let mut output = fs::File::create(output_path)?;
51
52 let input_dir_name = if input_dir == "." {
53 let current_dir = std::env::current_dir()?;
54 current_dir
55 .file_name()
56 .and_then(|n| n.to_str())
57 .unwrap_or_else(|| current_dir.to_str().unwrap_or("project"))
58 .to_string()
59 } else {
60 input_dir.to_string()
61 };
62
63 writeln!(output, "# Directory Structure Report\n")?;
65
66 if !filters.is_empty() {
67 writeln!(
68 output,
69 "This document contains files from the `{}` directory with extensions: {}",
70 input_dir_name,
71 filters.join(", ")
72 )?;
73 } else {
74 writeln!(
75 output,
76 "This document contains all files from the `{}` directory, optimized for LLM consumption.",
77 input_dir_name
78 )?;
79 }
80
81 if !ignores.is_empty() {
82 writeln!(output, "Custom ignored patterns: {}", ignores.join(", "))?;
83 }
84
85 let mut content_hasher = xxhash_rust::xxh3::Xxh3::new();
89 for entry in files {
90 let rel_path = entry.path().strip_prefix(base_path).unwrap_or(entry.path());
94 let normalized = rel_path.to_string_lossy().replace('\\', "/");
95 content_hasher.update(normalized.as_bytes());
96 content_hasher.update(b"\0");
98 if let Ok(bytes) = std::fs::read(entry.path()) {
100 content_hasher.update(&bytes);
101 }
102 content_hasher.update(b"\0");
103 }
104 writeln!(output, "Content hash: {:016x}", content_hasher.digest())?;
105 writeln!(output)?;
106
107 writeln!(output, "## File Tree Structure\n")?;
110
111 write_tree_to_file(&mut output, file_tree, 0)?;
112
113 writeln!(output)?;
114
115 #[cfg(feature = "parallel")]
119 {
120 use rayon::prelude::*;
121
122 type ChunkResult = (usize, io::Result<Vec<u8>>);
124 let (sender, receiver): (Sender<ChunkResult>, Receiver<ChunkResult>) =
125 bounded(num_cpus::get() * 2); let writer_handle = {
128 let mut output = output;
129 let total_files = files.len();
130 let budget = max_tokens;
131
132 thread::spawn(move || -> io::Result<()> {
133 let mut completed_chunks = std::collections::BTreeMap::new();
134 let mut next_index = 0;
135 let mut errors = Vec::new();
136 let mut tokens_used: usize = 0;
137 let mut budget_exceeded = false;
138
139 while next_index < total_files {
141 match receiver.recv() {
142 Ok((index, chunk_result)) => {
143 completed_chunks.insert(index, chunk_result);
144
145 while let Some(chunk_result) = completed_chunks.remove(&next_index) {
147 if budget_exceeded {
148 next_index += 1;
150 continue;
151 }
152
153 match chunk_result {
154 Ok(buf) => {
155 let chunk_tokens = buf.len() / 4;
157
158 if let Some(max) = budget
159 && tokens_used + chunk_tokens > max
160 && tokens_used > 0
161 {
162 let remaining = total_files - next_index;
163 let notice = format!(
164 "---\n\n_⚠️ Token budget ({}) reached. {} remaining files omitted._\n\n",
165 max, remaining
166 );
167 if let Err(e) = output.write_all(notice.as_bytes()) {
168 errors.push(format!(
169 "Failed to write truncation notice: {}",
170 e
171 ));
172 }
173 budget_exceeded = true;
174 next_index += 1;
175 continue;
176 }
177
178 tokens_used += chunk_tokens;
179 if let Err(e) = output.write_all(&buf) {
180 errors.push(format!(
181 "Failed to write output for file index {}: {}",
182 next_index, e
183 ));
184 }
185 }
186 Err(e) => {
187 errors.push(format!(
188 "Failed to process file index {}: {}",
189 next_index, e
190 ));
191 }
192 }
193 next_index += 1;
194 }
195 }
196 Err(_) => break, }
198 }
199
200 if !errors.is_empty() {
201 error!(
202 "Encountered {} errors during parallel processing:",
203 errors.len()
204 );
205 for err in &errors {
206 error!(" {}", err);
207 }
208 return Err(std::io::Error::other(format!(
209 "Failed to process {} files: {}",
210 errors.len(),
211 errors.join("; ")
212 )));
213 }
214
215 Ok(())
216 })
217 };
218
219 let ts_config_clone = ts_config.clone();
221 files.par_iter().enumerate().for_each(|(index, entry)| {
222 let mut buf = Vec::new();
223 let result = process_file(
224 base_path,
225 entry.path(),
226 &mut buf,
227 line_numbers,
228 encoding_strategy,
229 &ts_config_clone,
230 )
231 .map(|_| buf);
232
233 let _ = sender.send((index, result));
235 });
236
237 drop(sender);
239
240 writer_handle
242 .join()
243 .map_err(|_| std::io::Error::other("Writer thread panicked"))??;
244 }
245
246 #[cfg(not(feature = "parallel"))]
247 {
248 let mut tokens_used: usize = 0;
249
250 for (idx, entry) in files.iter().enumerate() {
251 let file_size = std::fs::metadata(entry.path())
253 .map(|m| m.len())
254 .unwrap_or(0);
255 let estimated_file_tokens = (file_size as usize) / 4;
256
257 if let Some(budget) = max_tokens {
258 if tokens_used + estimated_file_tokens > budget && tokens_used > 0 {
259 let remaining = files.len() - idx;
260 writeln!(output, "---\n")?;
261 writeln!(
262 output,
263 "_⚠️ Token budget ({}) reached. {} remaining files omitted._\n",
264 budget, remaining
265 )?;
266 break;
267 }
268 }
269
270 tokens_used += estimated_file_tokens;
271 process_file(
272 base_path,
273 entry.path(),
274 &mut output,
275 line_numbers,
276 encoding_strategy,
277 ts_config,
278 )?;
279 }
280 }
281
282 Ok(())
283}
284
285pub fn process_file(
287 base_path: &Path,
288 file_path: &Path,
289 output: &mut impl Write,
290 line_numbers: bool,
291 encoding_strategy: Option<&str>,
292 ts_config: &TreeSitterConfig,
293) -> io::Result<()> {
294 let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
295 info!("Processing file: {}", relative_path.display());
296
297 let metadata = match fs::metadata(file_path) {
298 Ok(meta) => meta,
299 Err(e) => {
300 error!(
301 "Failed to get metadata for {}: {}",
302 relative_path.display(),
303 e
304 );
305 return Ok(());
306 }
307 };
308
309 let modified_time = metadata
310 .modified()
311 .ok()
312 .map(|time| {
313 let system_time: chrono::DateTime<Utc> = time.into();
314 system_time.format("%Y-%m-%d %H:%M:%S UTC").to_string()
315 })
316 .unwrap_or_else(|| "Unknown".to_string());
317
318 writeln!(output)?;
319 writeln!(output, "### File: `{}`", relative_path.display())?;
320
321 writeln!(output)?;
322
323 writeln!(output, "- Size: {} bytes", metadata.len())?;
324 writeln!(output, "- Modified: {}", modified_time)?;
325 writeln!(output)?;
326
327 let extension = file_path
329 .extension()
330 .and_then(|s| s.to_str())
331 .unwrap_or("text");
332 let language = match extension {
333 "rs" => "rust",
334 "js" => "javascript",
335 "ts" => "typescript",
336 "jsx" => "jsx",
337 "tsx" => "tsx",
338 "json" => "json",
339 "toml" => "toml",
340 "md" => "markdown",
341 "yaml" | "yml" => "yaml",
342 "html" => "html",
343 "css" => "css",
344 "py" => "python",
345 "java" => "java",
346 "cpp" => "cpp",
347 "c" => "c",
348 "h" => "c",
349 "hpp" => "cpp",
350 "sql" => "sql",
351 "sh" => "bash",
352 "xml" => "xml",
353 "lock" => "toml",
354 _ => extension,
355 };
356
357 match fs::File::open(file_path) {
359 Ok(mut file) => {
360 let mut sniff = [0u8; 8192];
361 let n = match file.read(&mut sniff) {
362 Ok(n) => n,
363 Err(e) => {
364 warn!(
365 "Could not read file {}: {}. Skipping content.",
366 relative_path.display(),
367 e
368 );
369
370 writeln!(output, "```text")?;
371
372 writeln!(
373 output,
374 "<Could not read file content (e.g., binary file or permission error)>"
375 )?;
376
377 writeln!(output, "```")?;
378
379 return Ok(());
380 }
381 };
382 let slice = &sniff[..n];
383
384 let check_len = if n == sniff.len() {
388 let mut end = n;
390 while end > 0 && end > n.saturating_sub(4) && sniff[end - 1] & 0xC0 == 0x80 {
391 end -= 1; }
393 if end > 0 && end < n {
395 let leading = sniff[end - 1];
396 let expected_len = if leading & 0xE0 == 0xC0 {
397 2
398 } else if leading & 0xF0 == 0xE0 {
399 3
400 } else if leading & 0xF8 == 0xF0 {
401 4
402 } else {
403 1
404 };
405 if end - 1 + expected_len > n {
406 end - 1 } else {
408 n
409 }
410 } else {
411 n
412 }
413 } else {
414 n };
416
417 let is_utf8 = std::str::from_utf8(&sniff[..check_len]).is_ok();
419
420 if is_utf8 && !slice.contains(&0) {
421 } else {
423 let (encoding, _consumed) =
426 encoding_rs::Encoding::for_bom(slice).unwrap_or((encoding_rs::UTF_8, 0));
427
428 let detected_encoding = if encoding == UTF_8 {
430 detect_text_encoding(slice)
432 } else {
433 Some(encoding)
434 };
435
436 match detected_encoding {
437 Some(enc) if enc != UTF_8 => {
438 let strategy = encoding_strategy.unwrap_or("detect");
439 match strategy {
440 "strict" | "skip" => {
441 warn!(
443 "Skipping non-UTF-8 file {} (encoding: {}, strategy: {})",
444 relative_path.display(),
445 enc.name(),
446 strategy
447 );
448 }
449 _ => {
450 match transcode_file_content(file_path, enc) {
452 Ok(transcoded_content) => {
453 info!(
454 "Successfully transcoded {} from {} to UTF-8",
455 relative_path.display(),
456 enc.name()
457 );
458 write_text_content(
459 output,
460 &transcoded_content,
461 language,
462 line_numbers,
463 )?;
464 return Ok(());
465 }
466 Err(e) => {
467 warn!(
468 "Failed to transcode {} from {}: {}. Treating as binary.",
469 relative_path.display(),
470 enc.name(),
471 e
472 );
473 }
474 }
475 }
476 }
477 }
478 _ => {
479 if slice.contains(&0) {
481 warn!(
482 "Detected binary file {} (contains null bytes). Skipping content.",
483 relative_path.display()
484 );
485 } else {
486 warn!(
487 "Could not determine encoding for {}. Treating as binary.",
488 relative_path.display()
489 );
490 }
491 }
492 }
493
494 writeln!(output, "```text")?;
496 writeln!(
497 output,
498 "<Binary file or unsupported encoding: {} bytes>",
499 metadata.len()
500 )?;
501 writeln!(output, "```")?;
502 return Ok(());
503 }
504
505 if let Err(e) = file.seek(SeekFrom::Start(0)) {
507 warn!(
508 "Could not reset file cursor for {}: {}. Skipping content.",
509 relative_path.display(),
510 e
511 );
512 writeln!(output, "```text")?;
513 writeln!(
514 output,
515 "<Could not read file content (e.g., binary file or permission error)>"
516 )?;
517 writeln!(output, "```")?;
518 return Ok(());
519 }
520
521 let content = match std::fs::read_to_string(file_path) {
523 Ok(content) => content,
524 Err(e) => {
525 warn!(
526 "Error reading file {}: {}. Output may be truncated.",
527 relative_path.display(),
528 e
529 );
530 writeln!(output, "```text")?;
531 writeln!(output, "<Error reading file content>")?;
532 writeln!(output, "```")?;
533 return Ok(());
534 }
535 };
536 let signatures_only = ts_config.signatures
540 && crate::tree_sitter::is_supported_extension(extension);
541
542 if !signatures_only {
543 write_text_content(output, &content, language, line_numbers)?;
548 }
549
550 write_tree_sitter_enrichment(output, &content, extension, ts_config)?;
552 }
553 Err(e) => {
554 warn!(
555 "Could not open file {}: {}. Skipping content.",
556 relative_path.display(),
557 e
558 );
559 writeln!(output, "```text")?;
560 writeln!(
561 output,
562 "<Could not read file content (e.g., binary file or permission error)>"
563 )?;
564 writeln!(output, "```")?;
565 }
566 }
567
568 Ok(())
569}
570
571#[allow(unused_variables)]
573pub fn write_tree_sitter_enrichment(
574 output: &mut impl Write,
575 content: &str,
576 extension: &str,
577 ts_config: &TreeSitterConfig,
578) -> io::Result<()> {
579 if !ts_config.signatures && !ts_config.structure {
580 return Ok(());
581 }
582
583 #[cfg(feature = "tree-sitter-base")]
584 {
585 use crate::tree_sitter::language_support::Visibility;
586
587 let vis_filter: Visibility = ts_config.visibility.parse().unwrap_or(Visibility::All);
588
589 if ts_config.structure
590 && let Some(structure) =
591 crate::tree_sitter::extract_structure_for_file(content, extension)
592 {
593 let summary = crate::tree_sitter::structure::format_structure_as_markdown(&structure);
594 if !summary.is_empty() {
595 writeln!(output)?;
596 write!(output, "{}", summary)?;
597 }
598 }
599
600 if ts_config.signatures
601 && let Some(signatures) =
602 crate::tree_sitter::extract_signatures_for_file(content, extension, vis_filter)
603 && !signatures.is_empty()
604 {
605 let language = match extension {
606 "rs" => "rust",
607 "js" | "mjs" | "cjs" => "javascript",
608 "ts" | "tsx" | "mts" | "cts" => "typescript",
609 "py" | "pyw" => "python",
610 "go" => "go",
611 "java" => "java",
612 "c" | "h" => "c",
613 "cpp" | "cxx" | "cc" | "hpp" | "hxx" | "hh" => "cpp",
614 _ => extension,
615 };
616 writeln!(output)?;
617 writeln!(output, "**Signatures:**")?;
618 writeln!(output)?;
619 let formatted = crate::tree_sitter::signatures::format_signatures_as_markdown(
620 &signatures,
621 language,
622 );
623 write!(output, "{}", formatted)?;
624 }
625 }
626
627 #[cfg(not(feature = "tree-sitter-base"))]
628 {
629 }
632
633 Ok(())
634}
635
636fn detect_text_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
638 let encodings = [
640 encoding_rs::WINDOWS_1252,
641 encoding_rs::UTF_16LE,
642 encoding_rs::UTF_16BE,
643 encoding_rs::SHIFT_JIS,
644 ];
645
646 for encoding in &encodings {
647 let (decoded, _, had_errors) = encoding.decode(bytes);
648 if !had_errors && is_likely_text(&decoded) {
649 return Some(encoding);
650 }
651 }
652
653 None
654}
655
656fn is_likely_text(content: &str) -> bool {
658 let mut control_chars = 0;
659 let mut total_chars = 0;
660
661 for ch in content.chars() {
662 total_chars += 1;
663 if ch.is_control() && ch != '\n' && ch != '\r' && ch != '\t' {
664 control_chars += 1;
665 }
666
667 if total_chars > 100 && control_chars * 20 > total_chars {
669 return false;
670 }
671 }
672
673 if total_chars > 0 {
675 control_chars * 20 <= total_chars
676 } else {
677 true
678 }
679}
680
681fn transcode_file_content(file_path: &Path, encoding: &'static Encoding) -> io::Result<String> {
683 let bytes = std::fs::read(file_path)?;
684 let (decoded, _, had_errors) = encoding.decode(&bytes);
685
686 if had_errors {
687 return Err(io::Error::new(
688 io::ErrorKind::InvalidData,
689 format!("Failed to decode file with encoding {}", encoding.name()),
690 ));
691 }
692
693 Ok(decoded.into_owned())
694}
695
696fn write_text_content(
698 output: &mut impl Write,
699 content: &str,
700 language: &str,
701 line_numbers: bool,
702) -> io::Result<()> {
703 writeln!(output, "```{}", language)?;
704
705 if line_numbers {
706 for (i, line) in content.lines().enumerate() {
707 writeln!(output, "{:>4} | {}", i + 1, line)?;
708 }
709 } else {
710 output.write_all(content.as_bytes())?;
711 if !content.ends_with('\n') {
712 writeln!(output)?;
713 }
714 }
715
716 writeln!(output, "```")?;
717 Ok(())
718}
719
720#[cfg(test)]
721mod tests {
722 use super::*;
723 use std::fs;
724 use tempfile::tempdir;
725
726 #[test]
727 fn test_code_block_formatting() {
728 let dir = tempdir().unwrap();
729 let base_path = dir.path();
730 let file_path = base_path.join("test.rs");
731 let output_path = base_path.join("output.md");
732
733 fs::write(
735 &file_path,
736 "fn main() {\n println!(\"Hello, world!\");\n}",
737 )
738 .unwrap();
739
740 let mut output = fs::File::create(&output_path).unwrap();
742
743 process_file(
745 base_path,
746 &file_path,
747 &mut output,
748 false,
749 None,
750 &TreeSitterConfig::default(),
751 )
752 .unwrap();
753
754 let content = fs::read_to_string(&output_path).unwrap();
756
757 assert!(content.contains("```rust"));
759 assert!(content.contains("```") && content.matches("```").count() >= 2);
760 }
761
762 #[test]
763 fn test_markdown_file_formatting() {
764 let dir = tempdir().unwrap();
765 let base_path = dir.path();
766 let file_path = base_path.join("README.md");
767 let output_path = base_path.join("output.md");
768
769 fs::write(&file_path, "# Test\n\nThis is a test markdown file.").unwrap();
771
772 let mut output = fs::File::create(&output_path).unwrap();
774
775 process_file(
777 base_path,
778 &file_path,
779 &mut output,
780 false,
781 None,
782 &TreeSitterConfig::default(),
783 )
784 .unwrap();
785
786 let content = fs::read_to_string(&output_path).unwrap();
788
789 println!("Generated content:\n{}", content);
791
792 assert!(
794 content.contains("```markdown"),
795 "Content should contain '```markdown' but was: {}",
796 content
797 );
798 let code_block_markers = content.matches("```").count();
800
801 assert!(
802 code_block_markers >= 2,
803 "Expected at least 2 code block markers, found {}",
804 code_block_markers
805 );
806 }
807
808 #[test]
809 fn test_line_numbered_code_blocks() {
810 let dir = tempdir().unwrap();
811 let base_path = dir.path();
812 let file_path = base_path.join("lib.rs");
813 let output_path = base_path.join("out.md");
814
815 fs::write(
817 &file_path,
818 "fn add(a: i32, b: i32) -> i32 {\n a + b\n}\n\nfn main() {\n println!(\"{}\", add(1, 2));\n}\n",
819 )
820 .unwrap();
821
822 let mut output = fs::File::create(&output_path).unwrap();
823 process_file(
824 base_path,
825 &file_path,
826 &mut output,
827 true,
828 None,
829 &TreeSitterConfig::default(),
830 )
831 .unwrap();
832
833 let content = fs::read_to_string(&output_path).unwrap();
834
835 assert!(content.contains("```rust"));
837 assert!(content.contains(" 1 | "));
838 assert!(content.contains(" 2 | "));
839
840 let numbered_lines = content
842 .lines()
843 .filter(|l| {
844 l.trim_start()
845 .chars()
846 .next()
847 .map(|c| c.is_ascii_digit())
848 .unwrap_or(false)
849 && l.contains(" | ")
850 })
851 .count();
852 let original_line_count = fs::read_to_string(&file_path).unwrap().lines().count();
853 assert_eq!(numbered_lines, original_line_count);
854
855 assert!(content.contains("```"));
857 }
858
859 #[test]
860 fn test_binary_file_handling() {
861 let dir = tempdir().unwrap();
862 let base_path = dir.path();
863 let file_path = base_path.join("image.bin");
864 let output_path = base_path.join("out.md");
865
866 let bytes = vec![
868 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
873 fs::write(&file_path, bytes).unwrap();
874
875 let mut output = fs::File::create(&output_path).unwrap();
876 process_file(
877 base_path,
878 &file_path,
879 &mut output,
880 false,
881 None,
882 &TreeSitterConfig::default(),
883 )
884 .unwrap();
885
886 let content = fs::read_to_string(&output_path).unwrap();
887
888 assert!(content.contains("```text"));
890 assert!(content.contains("<Binary file or unsupported encoding:"));
891
892 let fence_count = content.matches("```").count();
894 assert!(
895 fence_count >= 2,
896 "expected at least opening and closing fences, got {}",
897 fence_count
898 );
899 }
900
901 #[test]
902 fn test_encoding_detection_and_transcoding() {
903 let dir = tempdir().unwrap();
904 let base_path = dir.path();
905 let output_path = base_path.join("out.md");
906
907 let windows1252_content = [
909 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, 0x0A, ];
913 let file_path = base_path.join("windows1252.txt");
914 fs::write(&file_path, windows1252_content).unwrap();
915
916 let mut output = fs::File::create(&output_path).unwrap();
917 process_file(
918 base_path,
919 &file_path,
920 &mut output,
921 false,
922 Some("detect"),
923 &TreeSitterConfig::default(),
924 )
925 .unwrap();
926
927 let content = fs::read_to_string(&output_path).unwrap();
928
929 assert!(content.contains("Hello"));
931 assert!(content.contains("World"));
932 assert!(content.contains("```txt"));
934
935 let fence_count = content.matches("```").count();
937 assert!(
938 fence_count >= 2,
939 "expected at least opening and closing fences, got {}",
940 fence_count
941 );
942 }
943
944 #[test]
945 fn test_encoding_strategy_strict() {
946 let dir = tempdir().unwrap();
947 let base_path = dir.path();
948 let output_path = base_path.join("out.md");
949
950 let non_utf8_content = [0xFF, 0xFE, 0x41, 0x00]; let file_path = base_path.join("utf16.txt");
953 fs::write(&file_path, non_utf8_content).unwrap();
954
955 let mut output = fs::File::create(&output_path).unwrap();
956 process_file(
957 base_path,
958 &file_path,
959 &mut output,
960 false,
961 Some("strict"),
962 &TreeSitterConfig::default(),
963 )
964 .unwrap();
965
966 let content = fs::read_to_string(&output_path).unwrap();
967
968 assert!(content.contains("<Binary file or unsupported encoding:"));
970 assert!(content.contains("```text"));
971
972 let fence_count = content.matches("```").count();
974 assert!(
975 fence_count >= 2,
976 "expected at least opening and closing fences, got {}",
977 fence_count
978 );
979 }
980
981 #[test]
982 fn test_encoding_strategy_skip() {
983 let dir = tempdir().unwrap();
984 let base_path = dir.path();
985 let output_path = base_path.join("out.md");
986
987 let utf16_content = [0xFF, 0xFE, 0x48, 0x00, 0x69, 0x00]; let file_path = base_path.join("utf16.txt");
990 fs::write(&file_path, utf16_content).unwrap();
991
992 let mut output = fs::File::create(&output_path).unwrap();
993 process_file(
994 base_path,
995 &file_path,
996 &mut output,
997 false,
998 Some("skip"),
999 &TreeSitterConfig::default(),
1000 )
1001 .unwrap();
1002
1003 let content = fs::read_to_string(&output_path).unwrap();
1004
1005 assert!(content.contains("<Binary file or unsupported encoding:"));
1007 assert!(content.contains("```text"));
1008 }
1009
1010 #[test]
1011 fn test_generate_markdown_with_current_directory() {
1012 let dir = tempdir().unwrap();
1013 let base_path = dir.path();
1014 let output_path = base_path.join("test.md");
1015
1016 fs::write(base_path.join("readme.txt"), "Hello world").unwrap();
1018
1019 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
1021 let file_tree = crate::tree::build_file_tree(&files, base_path);
1022
1023 let original_dir = std::env::current_dir().unwrap();
1025 std::env::set_current_dir(base_path).unwrap();
1026
1027 let result = generate_markdown(
1029 &output_path.to_string_lossy(),
1030 ".",
1031 &[],
1032 &[],
1033 &file_tree,
1034 &files,
1035 base_path,
1036 false,
1037 None,
1038 None, &TreeSitterConfig::default(),
1040 );
1041
1042 std::env::set_current_dir(original_dir).unwrap();
1044
1045 assert!(result.is_ok());
1046 let content = fs::read_to_string(&output_path).unwrap();
1047 assert!(content.contains("Directory Structure Report"));
1048 }
1049
1050 #[test]
1051 fn test_generate_markdown_creates_output_directory() {
1052 let dir = tempdir().unwrap();
1053 let base_path = dir.path();
1054 let nested_output = base_path.join("nested").join("deep").join("output.md");
1055
1056 fs::write(base_path.join("test.txt"), "content").unwrap();
1058
1059 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
1060 let file_tree = crate::tree::build_file_tree(&files, base_path);
1061
1062 let result = generate_markdown(
1063 &nested_output.to_string_lossy(),
1064 "test_dir",
1065 &[],
1066 &[],
1067 &file_tree,
1068 &files,
1069 base_path,
1070 false,
1071 None,
1072 None, &TreeSitterConfig::default(),
1074 );
1075
1076 assert!(result.is_ok());
1077 assert!(nested_output.exists());
1078 assert!(nested_output.parent().unwrap().exists());
1079 }
1080
1081 #[test]
1082 fn test_generate_markdown_with_filters_and_ignores() {
1083 let dir = tempdir().unwrap();
1084 let base_path = dir.path();
1085 let output_path = base_path.join("filtered.md");
1086
1087 fs::write(base_path.join("main.rs"), "fn main() {}").unwrap();
1088 fs::write(base_path.join("config.toml"), "[package]").unwrap();
1089 fs::write(base_path.join("readme.md"), "# README").unwrap();
1090
1091 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
1092 let file_tree = crate::tree::build_file_tree(&files, base_path);
1093
1094 let result = generate_markdown(
1095 &output_path.to_string_lossy(),
1096 "project",
1097 &["rs".to_string(), "toml".to_string()],
1098 &["readme.md".to_string()],
1099 &file_tree,
1100 &files,
1101 base_path,
1102 true,
1103 Some("strict"),
1104 None, &TreeSitterConfig::default(),
1106 );
1107
1108 assert!(result.is_ok());
1109 let content = fs::read_to_string(&output_path).unwrap();
1110 assert!(content.contains("Directory Structure Report"));
1111 assert!(content.contains("main.rs") || content.contains("config.toml"));
1113 }
1114
1115 #[test]
1116 fn test_write_text_content_with_line_numbers() {
1117 let mut output = Vec::new();
1118 let content = "line one\nline two\nline three";
1119
1120 write_text_content(&mut output, content, "rust", true).unwrap();
1121
1122 let result = String::from_utf8(output).unwrap();
1123 assert!(result.contains("```rust"));
1124 assert!(result.contains(" 1 | line one"));
1125 assert!(result.contains(" 2 | line two"));
1126 assert!(result.contains(" 3 | line three"));
1127 assert!(result.contains("```"));
1128 }
1129
1130 #[test]
1131 fn test_write_text_content_without_line_numbers() {
1132 let mut output = Vec::new();
1133 let content = "function test() {\n return true;\n}";
1134
1135 write_text_content(&mut output, content, "javascript", false).unwrap();
1136
1137 let result = String::from_utf8(output).unwrap();
1138 assert!(result.contains("```javascript"));
1139 assert!(result.contains("function test() {"));
1140 assert!(result.contains(" return true;"));
1141 assert!(result.contains("```"));
1142 assert!(!result.contains(" | ")); }
1144
1145 #[test]
1146 fn test_write_text_content_without_trailing_newline() {
1147 let mut output = Vec::new();
1148 let content = "no newline at end"; write_text_content(&mut output, content, "text", false).unwrap();
1151
1152 let result = String::from_utf8(output).unwrap();
1153 assert!(result.contains("```text"));
1154 assert!(result.contains("no newline at end"));
1155 assert!(result.ends_with("```\n")); }
1157
1158 #[test]
1159 fn test_is_likely_text() {
1160 assert!(is_likely_text("Hello world\nThis is normal text"));
1162
1163 assert!(is_likely_text(
1165 "Line 1\nLine 2\tTabbed\r\nWindows line ending"
1166 ));
1167
1168 let mut bad_text = String::new();
1170 for i in 0..200 {
1171 if i % 5 == 0 {
1172 bad_text.push('\x01'); } else {
1174 bad_text.push('a');
1175 }
1176 }
1177 assert!(!is_likely_text(&bad_text));
1178
1179 assert!(is_likely_text(""));
1181 }
1182
1183 #[test]
1184 fn test_detect_text_encoding() {
1185 let utf8_bytes = "Hello world".as_bytes();
1187 let result = detect_text_encoding(utf8_bytes);
1188 assert!(result.is_some() || result.is_none());
1191
1192 let windows1252_bytes = [
1194 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x94,
1195 ];
1196 let detected = detect_text_encoding(&windows1252_bytes);
1197 assert!(detected.is_some());
1198 }
1199
1200 #[test]
1201 fn test_transcode_file_content() {
1202 let dir = tempdir().unwrap();
1203 let file_path = dir.path().join("windows1252.txt");
1204
1205 let windows1252_content = [
1207 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, ];
1210 fs::write(&file_path, windows1252_content).unwrap();
1211
1212 let result = transcode_file_content(&file_path, encoding_rs::WINDOWS_1252);
1213 assert!(result.is_ok());
1214
1215 let transcoded = result.unwrap();
1216 assert!(transcoded.contains("Hello"));
1217 assert!(transcoded.contains("World"));
1218 }
1219
1220 #[test]
1221 fn test_process_file_with_metadata_error() {
1222 let dir = tempdir().unwrap();
1223 let base_path = dir.path();
1224 let nonexistent_file = base_path.join("nonexistent.txt");
1225 let output_path = base_path.join("output.md");
1226
1227 let mut output = fs::File::create(&output_path).unwrap();
1228
1229 let result = process_file(
1231 base_path,
1232 &nonexistent_file,
1233 &mut output,
1234 false,
1235 None,
1236 &TreeSitterConfig::default(),
1237 );
1238 assert!(result.is_ok());
1239
1240 let content = fs::read_to_string(&output_path).unwrap();
1242 assert!(content.is_empty() || content.trim().is_empty());
1243 }
1244
1245 #[test]
1246 fn test_process_file_with_different_extensions() {
1247 let dir = tempdir().unwrap();
1248 let base_path = dir.path();
1249 let output_path = base_path.join("output.md");
1250
1251 let test_files = [
1253 ("script.py", "print('hello')", "python"),
1254 ("data.json", r#"{"key": "value"}"#, "json"),
1255 ("config.yaml", "key: value", "yaml"),
1256 ("style.css", "body { margin: 0; }", "css"),
1257 ("page.html", "<html><body>Test</body></html>", "html"),
1258 ("query.sql", "SELECT * FROM users;", "sql"),
1259 ("build.sh", "#!/bin/bash\necho 'building'", "bash"),
1260 ("unknown.xyz", "unknown content", "xyz"),
1261 ];
1262
1263 for (filename, content, expected_lang) in test_files.iter() {
1264 let file_path = base_path.join(filename);
1265 fs::write(&file_path, content).unwrap();
1266
1267 let mut output = fs::File::create(&output_path).unwrap();
1268 process_file(
1269 base_path,
1270 &file_path,
1271 &mut output,
1272 false,
1273 None,
1274 &TreeSitterConfig::default(),
1275 )
1276 .unwrap();
1277
1278 let result = fs::read_to_string(&output_path).unwrap();
1279 assert!(result.contains(&format!("```{}", expected_lang)));
1280 assert!(result.contains(content));
1281 assert!(result.contains(filename));
1282 }
1283 }
1284}