1use chrono::Utc;
2use ignore::DirEntry;
3use log::{error, info, warn};
4use std::fs;
5use std::io::{self, Read, Seek, SeekFrom, Write};
6use std::path::Path;
7
8use crate::tree::{FileTree, write_tree_to_file};
9use encoding_rs::{Encoding, UTF_8};
10
11#[cfg(feature = "parallel")]
12use crossbeam_channel::{Receiver, Sender, bounded};
13#[cfg(feature = "parallel")]
14use std::thread;
15
16#[allow(clippy::too_many_arguments)]
18pub fn generate_markdown(
19 output_path: &str,
20 input_dir: &str,
21 filters: &[String],
22 ignores: &[String],
23 file_tree: &FileTree,
24 files: &[DirEntry],
25 base_path: &Path,
26 line_numbers: bool,
27 encoding_strategy: Option<&str>,
28) -> io::Result<()> {
29 if let Some(parent) = Path::new(output_path).parent()
30 && !parent.exists()
31 {
32 fs::create_dir_all(parent)?;
33 }
34
35 let mut output = fs::File::create(output_path)?;
36
37 let input_dir_name = if input_dir == "." {
38 let current_dir = std::env::current_dir()?;
39 current_dir
40 .file_name()
41 .unwrap()
42 .to_str()
43 .unwrap()
44 .to_string()
45 } else {
46 input_dir.to_string()
47 };
48
49 writeln!(output, "# Directory Structure Report\n")?;
51
52 if !filters.is_empty() {
53 writeln!(
54 output,
55 "This document contains files from the `{}` directory with extensions: {}",
56 input_dir_name,
57 filters.join(", ")
58 )?;
59 } else {
60 writeln!(
61 output,
62 "This document contains all files from the `{}` directory, optimized for LLM consumption.",
63 input_dir_name
64 )?;
65 }
66
67 if !ignores.is_empty() {
68 writeln!(output, "Custom ignored patterns: {}", ignores.join(", "))?;
69 }
70
71 writeln!(
72 output,
73 "Processed at: {}",
74 Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
75 )?;
76 writeln!(output)?;
77
78 writeln!(output, "## File Tree Structure\n")?;
81
82 write_tree_to_file(&mut output, file_tree, 0)?;
83
84 writeln!(output)?;
85
86 #[cfg(feature = "parallel")]
90 {
91 use rayon::prelude::*;
92
93 type ChunkResult = (usize, io::Result<Vec<u8>>);
95 let (sender, receiver): (Sender<ChunkResult>, Receiver<ChunkResult>) =
96 bounded(num_cpus::get() * 2); let writer_handle = {
99 let mut output = output;
100 let total_files = files.len();
101
102 thread::spawn(move || -> io::Result<()> {
103 let mut completed_chunks = std::collections::BTreeMap::new();
104 let mut next_index = 0;
105 let mut errors = Vec::new();
106
107 while next_index < total_files {
109 match receiver.recv() {
110 Ok((index, chunk_result)) => {
111 completed_chunks.insert(index, chunk_result);
112
113 while let Some(chunk_result) = completed_chunks.remove(&next_index) {
115 match chunk_result {
116 Ok(buf) => {
117 if let Err(e) = output.write_all(&buf) {
118 errors.push(format!(
119 "Failed to write output for file index {}: {}",
120 next_index, e
121 ));
122 }
123 }
124 Err(e) => {
125 errors.push(format!(
126 "Failed to process file index {}: {}",
127 next_index, e
128 ));
129 }
130 }
131 next_index += 1;
132 }
133 }
134 Err(_) => break, }
136 }
137
138 if !errors.is_empty() {
139 error!(
140 "Encountered {} errors during parallel processing:",
141 errors.len()
142 );
143 for err in &errors {
144 error!(" {}", err);
145 }
146 return Err(std::io::Error::other(format!(
147 "Failed to process {} files: {}",
148 errors.len(),
149 errors.join("; ")
150 )));
151 }
152
153 Ok(())
154 })
155 };
156
157 files.par_iter().enumerate().for_each(|(index, entry)| {
159 let mut buf = Vec::new();
160 let result = process_file(
161 base_path,
162 entry.path(),
163 &mut buf,
164 line_numbers,
165 encoding_strategy,
166 )
167 .map(|_| buf);
168
169 let _ = sender.send((index, result));
171 });
172
173 drop(sender);
175
176 writer_handle
178 .join()
179 .map_err(|_| std::io::Error::other("Writer thread panicked"))??;
180 }
181
182 #[cfg(not(feature = "parallel"))]
183 {
184 for entry in files {
185 process_file(
186 base_path,
187 entry.path(),
188 &mut output,
189 line_numbers,
190 encoding_strategy,
191 )?;
192 }
193 }
194
195 Ok(())
196}
197
198pub fn process_file(
200 base_path: &Path,
201
202 file_path: &Path,
203
204 output: &mut impl Write,
205 line_numbers: bool,
206 encoding_strategy: Option<&str>,
207) -> io::Result<()> {
208 let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
209 info!("Processing file: {}", relative_path.display());
210
211 let metadata = match fs::metadata(file_path) {
212 Ok(meta) => meta,
213 Err(e) => {
214 error!(
215 "Failed to get metadata for {}: {}",
216 relative_path.display(),
217 e
218 );
219 return Ok(());
220 }
221 };
222
223 let modified_time = metadata
224 .modified()
225 .ok()
226 .map(|time| {
227 let system_time: chrono::DateTime<Utc> = time.into();
228 system_time.format("%Y-%m-%d %H:%M:%S UTC").to_string()
229 })
230 .unwrap_or_else(|| "Unknown".to_string());
231
232 writeln!(output)?;
233 writeln!(output, "### File: `{}`", relative_path.display())?;
234
235 writeln!(output)?;
236
237 writeln!(output, "- Size: {} bytes", metadata.len())?;
238 writeln!(output, "- Modified: {}", modified_time)?;
239 writeln!(output)?;
240
241 let extension = file_path
243 .extension()
244 .and_then(|s| s.to_str())
245 .unwrap_or("text");
246 let language = match extension {
247 "rs" => "rust",
248 "js" => "javascript",
249 "ts" => "typescript",
250 "jsx" => "jsx",
251 "tsx" => "tsx",
252 "json" => "json",
253 "toml" => "toml",
254 "md" => "markdown",
255 "yaml" | "yml" => "yaml",
256 "html" => "html",
257 "css" => "css",
258 "py" => "python",
259 "java" => "java",
260 "cpp" => "cpp",
261 "c" => "c",
262 "h" => "c",
263 "hpp" => "cpp",
264 "sql" => "sql",
265 "sh" => "bash",
266 "xml" => "xml",
267 "lock" => "toml",
268 _ => extension,
269 };
270
271 match fs::File::open(file_path) {
273 Ok(mut file) => {
274 let mut sniff = [0u8; 8192];
275 let n = match file.read(&mut sniff) {
276 Ok(n) => n,
277 Err(e) => {
278 warn!(
279 "Could not read file {}: {}. Skipping content.",
280 relative_path.display(),
281 e
282 );
283
284 writeln!(output, "```text")?;
285
286 writeln!(
287 output,
288 "<Could not read file content (e.g., binary file or permission error)>"
289 )?;
290
291 writeln!(output, "```")?;
292
293 return Ok(());
294 }
295 };
296 let slice = &sniff[..n];
297
298 let is_utf8 = std::str::from_utf8(slice).is_ok();
300
301 if is_utf8 && !slice.contains(&0) {
302 } else {
304 let (encoding, _consumed) =
307 encoding_rs::Encoding::for_bom(slice).unwrap_or((encoding_rs::UTF_8, 0));
308
309 let detected_encoding = if encoding == UTF_8 {
311 detect_text_encoding(slice)
313 } else {
314 Some(encoding)
315 };
316
317 match detected_encoding {
318 Some(enc) if enc != UTF_8 => {
319 let strategy = encoding_strategy.unwrap_or("detect");
320 match strategy {
321 "strict" | "skip" => {
322 warn!(
324 "Skipping non-UTF-8 file {} (encoding: {}, strategy: {})",
325 relative_path.display(),
326 enc.name(),
327 strategy
328 );
329 }
330 _ => {
331 match transcode_file_content(file_path, enc) {
333 Ok(transcoded_content) => {
334 info!(
335 "Successfully transcoded {} from {} to UTF-8",
336 relative_path.display(),
337 enc.name()
338 );
339 write_text_content(
340 output,
341 &transcoded_content,
342 language,
343 line_numbers,
344 )?;
345 return Ok(());
346 }
347 Err(e) => {
348 warn!(
349 "Failed to transcode {} from {}: {}. Treating as binary.",
350 relative_path.display(),
351 enc.name(),
352 e
353 );
354 }
355 }
356 }
357 }
358 }
359 _ => {
360 if slice.contains(&0) {
362 warn!(
363 "Detected binary file {} (contains null bytes). Skipping content.",
364 relative_path.display()
365 );
366 } else {
367 warn!(
368 "Could not determine encoding for {}. Treating as binary.",
369 relative_path.display()
370 );
371 }
372 }
373 }
374
375 writeln!(output, "```text")?;
377 writeln!(
378 output,
379 "<Binary file or unsupported encoding: {} bytes>",
380 metadata.len()
381 )?;
382 writeln!(output, "```")?;
383 return Ok(());
384 }
385
386 if let Err(e) = file.seek(SeekFrom::Start(0)) {
388 warn!(
389 "Could not reset file cursor for {}: {}. Skipping content.",
390 relative_path.display(),
391 e
392 );
393 writeln!(output, "```text")?;
394 writeln!(
395 output,
396 "<Could not read file content (e.g., binary file or permission error)>"
397 )?;
398 writeln!(output, "```")?;
399 return Ok(());
400 }
401
402 if let Err(e) = file.seek(SeekFrom::Start(0)) {
404 warn!(
405 "Could not reset file cursor for {}: {}. Skipping content.",
406 relative_path.display(),
407 e
408 );
409 writeln!(output, "```text")?;
410 writeln!(
411 output,
412 "<Could not read file content (e.g., binary file or permission error)>"
413 )?;
414 writeln!(output, "```")?;
415 return Ok(());
416 }
417
418 let content = match std::fs::read_to_string(file_path) {
419 Ok(content) => content,
420 Err(e) => {
421 warn!(
422 "Error reading file {}: {}. Output may be truncated.",
423 relative_path.display(),
424 e
425 );
426 writeln!(output, "```text")?;
427 writeln!(output, "<Error reading file content>")?;
428 writeln!(output, "```")?;
429 return Ok(());
430 }
431 };
432
433 write_text_content(output, &content, language, line_numbers)?;
434 }
435 Err(e) => {
436 warn!(
437 "Could not open file {}: {}. Skipping content.",
438 relative_path.display(),
439 e
440 );
441 writeln!(output, "```text")?;
442 writeln!(
443 output,
444 "<Could not read file content (e.g., binary file or permission error)>"
445 )?;
446 writeln!(output, "```")?;
447 }
448 }
449
450 Ok(())
451}
452
453fn detect_text_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
455 let encodings = [
457 encoding_rs::WINDOWS_1252,
458 encoding_rs::UTF_16LE,
459 encoding_rs::UTF_16BE,
460 encoding_rs::SHIFT_JIS,
461 ];
462
463 for encoding in &encodings {
464 let (decoded, _, had_errors) = encoding.decode(bytes);
465 if !had_errors && is_likely_text(&decoded) {
466 return Some(encoding);
467 }
468 }
469
470 None
471}
472
473fn is_likely_text(content: &str) -> bool {
475 let mut control_chars = 0;
476 let mut total_chars = 0;
477
478 for ch in content.chars() {
479 total_chars += 1;
480 if ch.is_control() && ch != '\n' && ch != '\r' && ch != '\t' {
481 control_chars += 1;
482 }
483
484 if total_chars > 100 && control_chars * 20 > total_chars {
486 return false;
487 }
488 }
489
490 if total_chars > 0 {
492 control_chars * 20 <= total_chars
493 } else {
494 true
495 }
496}
497
498fn transcode_file_content(file_path: &Path, encoding: &'static Encoding) -> io::Result<String> {
500 let bytes = std::fs::read(file_path)?;
501 let (decoded, _, had_errors) = encoding.decode(&bytes);
502
503 if had_errors {
504 return Err(io::Error::new(
505 io::ErrorKind::InvalidData,
506 format!("Failed to decode file with encoding {}", encoding.name()),
507 ));
508 }
509
510 Ok(decoded.into_owned())
511}
512
513fn write_text_content(
515 output: &mut impl Write,
516 content: &str,
517 language: &str,
518 line_numbers: bool,
519) -> io::Result<()> {
520 writeln!(output, "```{}", language)?;
521
522 if line_numbers {
523 for (i, line) in content.lines().enumerate() {
524 writeln!(output, "{:>4} | {}", i + 1, line)?;
525 }
526 } else {
527 output.write_all(content.as_bytes())?;
528 if !content.ends_with('\n') {
529 writeln!(output)?;
530 }
531 }
532
533 writeln!(output, "```")?;
534 Ok(())
535}
536
537#[cfg(test)]
538mod tests {
539 use super::*;
540 use std::fs;
541 use tempfile::tempdir;
542
543 #[test]
544 fn test_code_block_formatting() {
545 let dir = tempdir().unwrap();
546 let base_path = dir.path();
547 let file_path = base_path.join("test.rs");
548 let output_path = base_path.join("output.md");
549
550 fs::write(
552 &file_path,
553 "fn main() {\n println!(\"Hello, world!\");\n}",
554 )
555 .unwrap();
556
557 let mut output = fs::File::create(&output_path).unwrap();
559
560 process_file(base_path, &file_path, &mut output, false, None).unwrap();
562
563 let content = fs::read_to_string(&output_path).unwrap();
565
566 assert!(content.contains("```rust"));
568 assert!(content.contains("```") && content.matches("```").count() >= 2);
569 }
570
571 #[test]
572 fn test_markdown_file_formatting() {
573 let dir = tempdir().unwrap();
574 let base_path = dir.path();
575 let file_path = base_path.join("README.md");
576 let output_path = base_path.join("output.md");
577
578 fs::write(&file_path, "# Test\n\nThis is a test markdown file.").unwrap();
580
581 let mut output = fs::File::create(&output_path).unwrap();
583
584 process_file(base_path, &file_path, &mut output, false, None).unwrap();
586
587 let content = fs::read_to_string(&output_path).unwrap();
589
590 println!("Generated content:\n{}", content);
592
593 assert!(
595 content.contains("```markdown"),
596 "Content should contain '```markdown' but was: {}",
597 content
598 );
599 let code_block_markers = content.matches("```").count();
601
602 assert!(
603 code_block_markers >= 2,
604 "Expected at least 2 code block markers, found {}",
605 code_block_markers
606 );
607 }
608
609 #[test]
610 fn test_line_numbered_code_blocks() {
611 let dir = tempdir().unwrap();
612 let base_path = dir.path();
613 let file_path = base_path.join("lib.rs");
614 let output_path = base_path.join("out.md");
615
616 fs::write(
618 &file_path,
619 "fn add(a: i32, b: i32) -> i32 {\n a + b\n}\n\nfn main() {\n println!(\"{}\", add(1, 2));\n}\n",
620 )
621 .unwrap();
622
623 let mut output = fs::File::create(&output_path).unwrap();
624 process_file(base_path, &file_path, &mut output, true, None).unwrap();
625
626 let content = fs::read_to_string(&output_path).unwrap();
627
628 assert!(content.contains("```rust"));
630 assert!(content.contains(" 1 | "));
631 assert!(content.contains(" 2 | "));
632
633 let numbered_lines = content
635 .lines()
636 .filter(|l| {
637 l.trim_start()
638 .chars()
639 .next()
640 .map(|c| c.is_ascii_digit())
641 .unwrap_or(false)
642 && l.contains(" | ")
643 })
644 .count();
645 let original_line_count = fs::read_to_string(&file_path).unwrap().lines().count();
646 assert_eq!(numbered_lines, original_line_count);
647
648 assert!(content.contains("```"));
650 }
651
652 #[test]
653 fn test_binary_file_handling() {
654 let dir = tempdir().unwrap();
655 let base_path = dir.path();
656 let file_path = base_path.join("image.bin");
657 let output_path = base_path.join("out.md");
658
659 let bytes = vec![
661 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
666 fs::write(&file_path, bytes).unwrap();
667
668 let mut output = fs::File::create(&output_path).unwrap();
669 process_file(base_path, &file_path, &mut output, false, None).unwrap();
670
671 let content = fs::read_to_string(&output_path).unwrap();
672
673 assert!(content.contains("```text"));
675 assert!(content.contains("<Binary file or unsupported encoding:"));
676
677 let fence_count = content.matches("```").count();
679 assert!(
680 fence_count >= 2,
681 "expected at least opening and closing fences, got {}",
682 fence_count
683 );
684 }
685
686 #[test]
687 fn test_encoding_detection_and_transcoding() {
688 let dir = tempdir().unwrap();
689 let base_path = dir.path();
690 let output_path = base_path.join("out.md");
691
692 let windows1252_content = [
694 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, 0x0A, ];
698 let file_path = base_path.join("windows1252.txt");
699 fs::write(&file_path, windows1252_content).unwrap();
700
701 let mut output = fs::File::create(&output_path).unwrap();
702 process_file(base_path, &file_path, &mut output, false, Some("detect")).unwrap();
703
704 let content = fs::read_to_string(&output_path).unwrap();
705
706 assert!(content.contains("Hello"));
708 assert!(content.contains("World"));
709 assert!(content.contains("```txt"));
711
712 let fence_count = content.matches("```").count();
714 assert!(
715 fence_count >= 2,
716 "expected at least opening and closing fences, got {}",
717 fence_count
718 );
719 }
720
721 #[test]
722 fn test_encoding_strategy_strict() {
723 let dir = tempdir().unwrap();
724 let base_path = dir.path();
725 let output_path = base_path.join("out.md");
726
727 let non_utf8_content = [0xFF, 0xFE, 0x41, 0x00]; let file_path = base_path.join("utf16.txt");
730 fs::write(&file_path, non_utf8_content).unwrap();
731
732 let mut output = fs::File::create(&output_path).unwrap();
733 process_file(base_path, &file_path, &mut output, false, Some("strict")).unwrap();
734
735 let content = fs::read_to_string(&output_path).unwrap();
736
737 assert!(content.contains("<Binary file or unsupported encoding:"));
739 assert!(content.contains("```text"));
740
741 let fence_count = content.matches("```").count();
743 assert!(
744 fence_count >= 2,
745 "expected at least opening and closing fences, got {}",
746 fence_count
747 );
748 }
749
750 #[test]
751 fn test_encoding_strategy_skip() {
752 let dir = tempdir().unwrap();
753 let base_path = dir.path();
754 let output_path = base_path.join("out.md");
755
756 let utf16_content = [0xFF, 0xFE, 0x48, 0x00, 0x69, 0x00]; let file_path = base_path.join("utf16.txt");
759 fs::write(&file_path, utf16_content).unwrap();
760
761 let mut output = fs::File::create(&output_path).unwrap();
762 process_file(base_path, &file_path, &mut output, false, Some("skip")).unwrap();
763
764 let content = fs::read_to_string(&output_path).unwrap();
765
766 assert!(content.contains("<Binary file or unsupported encoding:"));
768 assert!(content.contains("```text"));
769 }
770
771 #[test]
772 fn test_generate_markdown_with_current_directory() {
773 let dir = tempdir().unwrap();
774 let base_path = dir.path();
775 let output_path = base_path.join("test.md");
776
777 fs::write(base_path.join("readme.txt"), "Hello world").unwrap();
779
780 let files = crate::file_utils::collect_files(base_path, &[], &[]).unwrap();
782 let file_tree = crate::tree::build_file_tree(&files, base_path);
783
784 let original_dir = std::env::current_dir().unwrap();
786 std::env::set_current_dir(base_path).unwrap();
787
788 let result = generate_markdown(
790 &output_path.to_string_lossy(),
791 ".",
792 &[],
793 &[],
794 &file_tree,
795 &files,
796 base_path,
797 false,
798 None,
799 );
800
801 std::env::set_current_dir(original_dir).unwrap();
803
804 assert!(result.is_ok());
805 let content = fs::read_to_string(&output_path).unwrap();
806 assert!(content.contains("Directory Structure Report"));
807 }
808
809 #[test]
810 fn test_generate_markdown_creates_output_directory() {
811 let dir = tempdir().unwrap();
812 let base_path = dir.path();
813 let nested_output = base_path.join("nested").join("deep").join("output.md");
814
815 fs::write(base_path.join("test.txt"), "content").unwrap();
817
818 let files = crate::file_utils::collect_files(base_path, &[], &[]).unwrap();
819 let file_tree = crate::tree::build_file_tree(&files, base_path);
820
821 let result = generate_markdown(
822 &nested_output.to_string_lossy(),
823 "test_dir",
824 &[],
825 &[],
826 &file_tree,
827 &files,
828 base_path,
829 false,
830 None,
831 );
832
833 assert!(result.is_ok());
834 assert!(nested_output.exists());
835 assert!(nested_output.parent().unwrap().exists());
836 }
837
838 #[test]
839 fn test_generate_markdown_with_filters_and_ignores() {
840 let dir = tempdir().unwrap();
841 let base_path = dir.path();
842 let output_path = base_path.join("filtered.md");
843
844 fs::write(base_path.join("main.rs"), "fn main() {}").unwrap();
845 fs::write(base_path.join("config.toml"), "[package]").unwrap();
846 fs::write(base_path.join("readme.md"), "# README").unwrap();
847
848 let files = crate::file_utils::collect_files(base_path, &[], &[]).unwrap();
849 let file_tree = crate::tree::build_file_tree(&files, base_path);
850
851 let result = generate_markdown(
852 &output_path.to_string_lossy(),
853 "project",
854 &["rs".to_string(), "toml".to_string()],
855 &["readme.md".to_string()],
856 &file_tree,
857 &files,
858 base_path,
859 true,
860 Some("strict"),
861 );
862
863 assert!(result.is_ok());
864 let content = fs::read_to_string(&output_path).unwrap();
865 assert!(content.contains("Directory Structure Report"));
866 assert!(content.contains("main.rs") || content.contains("config.toml"));
868 }
869
870 #[test]
871 fn test_write_text_content_with_line_numbers() {
872 let mut output = Vec::new();
873 let content = "line one\nline two\nline three";
874
875 write_text_content(&mut output, content, "rust", true).unwrap();
876
877 let result = String::from_utf8(output).unwrap();
878 assert!(result.contains("```rust"));
879 assert!(result.contains(" 1 | line one"));
880 assert!(result.contains(" 2 | line two"));
881 assert!(result.contains(" 3 | line three"));
882 assert!(result.contains("```"));
883 }
884
885 #[test]
886 fn test_write_text_content_without_line_numbers() {
887 let mut output = Vec::new();
888 let content = "function test() {\n return true;\n}";
889
890 write_text_content(&mut output, content, "javascript", false).unwrap();
891
892 let result = String::from_utf8(output).unwrap();
893 assert!(result.contains("```javascript"));
894 assert!(result.contains("function test() {"));
895 assert!(result.contains(" return true;"));
896 assert!(result.contains("```"));
897 assert!(!result.contains(" | ")); }
899
900 #[test]
901 fn test_write_text_content_without_trailing_newline() {
902 let mut output = Vec::new();
903 let content = "no newline at end"; write_text_content(&mut output, content, "text", false).unwrap();
906
907 let result = String::from_utf8(output).unwrap();
908 assert!(result.contains("```text"));
909 assert!(result.contains("no newline at end"));
910 assert!(result.ends_with("```\n")); }
912
913 #[test]
914 fn test_is_likely_text() {
915 assert!(is_likely_text("Hello world\nThis is normal text"));
917
918 assert!(is_likely_text(
920 "Line 1\nLine 2\tTabbed\r\nWindows line ending"
921 ));
922
923 let mut bad_text = String::new();
925 for i in 0..200 {
926 if i % 5 == 0 {
927 bad_text.push('\x01'); } else {
929 bad_text.push('a');
930 }
931 }
932 assert!(!is_likely_text(&bad_text));
933
934 assert!(is_likely_text(""));
936 }
937
938 #[test]
939 fn test_detect_text_encoding() {
940 let utf8_bytes = "Hello world".as_bytes();
942 let result = detect_text_encoding(utf8_bytes);
943 assert!(result.is_some() || result.is_none());
946
947 let windows1252_bytes = [
949 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x94,
950 ];
951 let detected = detect_text_encoding(&windows1252_bytes);
952 assert!(detected.is_some());
953 }
954
955 #[test]
956 fn test_transcode_file_content() {
957 let dir = tempdir().unwrap();
958 let file_path = dir.path().join("windows1252.txt");
959
960 let windows1252_content = [
962 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, ];
965 fs::write(&file_path, windows1252_content).unwrap();
966
967 let result = transcode_file_content(&file_path, encoding_rs::WINDOWS_1252);
968 assert!(result.is_ok());
969
970 let transcoded = result.unwrap();
971 assert!(transcoded.contains("Hello"));
972 assert!(transcoded.contains("World"));
973 }
974
975 #[test]
976 fn test_process_file_with_metadata_error() {
977 let dir = tempdir().unwrap();
978 let base_path = dir.path();
979 let nonexistent_file = base_path.join("nonexistent.txt");
980 let output_path = base_path.join("output.md");
981
982 let mut output = fs::File::create(&output_path).unwrap();
983
984 let result = process_file(base_path, &nonexistent_file, &mut output, false, None);
986 assert!(result.is_ok());
987
988 let content = fs::read_to_string(&output_path).unwrap();
990 assert!(content.is_empty() || content.trim().is_empty());
991 }
992
993 #[test]
994 fn test_process_file_with_different_extensions() {
995 let dir = tempdir().unwrap();
996 let base_path = dir.path();
997 let output_path = base_path.join("output.md");
998
999 let test_files = [
1001 ("script.py", "print('hello')", "python"),
1002 ("data.json", r#"{"key": "value"}"#, "json"),
1003 ("config.yaml", "key: value", "yaml"),
1004 ("style.css", "body { margin: 0; }", "css"),
1005 ("page.html", "<html><body>Test</body></html>", "html"),
1006 ("query.sql", "SELECT * FROM users;", "sql"),
1007 ("build.sh", "#!/bin/bash\necho 'building'", "bash"),
1008 ("unknown.xyz", "unknown content", "xyz"),
1009 ];
1010
1011 for (filename, content, expected_lang) in test_files.iter() {
1012 let file_path = base_path.join(filename);
1013 fs::write(&file_path, content).unwrap();
1014
1015 let mut output = fs::File::create(&output_path).unwrap();
1016 process_file(base_path, &file_path, &mut output, false, None).unwrap();
1017
1018 let result = fs::read_to_string(&output_path).unwrap();
1019 assert!(result.contains(&format!("```{}", expected_lang)));
1020 assert!(result.contains(content));
1021 assert!(result.contains(filename));
1022 }
1023 }
1024}