1use chrono::Utc;
2use ignore::DirEntry;
3use log::{error, info, warn};
4use std::fs;
5use std::io::{self, Read, Seek, SeekFrom, Write};
6use std::path::Path;
7
8use crate::tree::{FileTree, write_tree_to_file};
9use encoding_rs::{Encoding, UTF_8};
10
11#[cfg(feature = "parallel")]
12use crossbeam_channel::{Receiver, Sender, bounded};
13#[cfg(feature = "parallel")]
14use std::thread;
15
16#[allow(clippy::too_many_arguments)]
18pub fn generate_markdown(
19 output_path: &str,
20 input_dir: &str,
21 filters: &[String],
22 ignores: &[String],
23 file_tree: &FileTree,
24 files: &[DirEntry],
25 base_path: &Path,
26 line_numbers: bool,
27 encoding_strategy: Option<&str>,
28) -> io::Result<()> {
29 if let Some(parent) = Path::new(output_path).parent()
30 && !parent.exists()
31 {
32 fs::create_dir_all(parent)?;
33 }
34
35 let mut output = fs::File::create(output_path)?;
36
37 let input_dir_name = if input_dir == "." {
38 let current_dir = std::env::current_dir()?;
39 current_dir
40 .file_name()
41 .unwrap()
42 .to_str()
43 .unwrap()
44 .to_string()
45 } else {
46 input_dir.to_string()
47 };
48
49 writeln!(output, "# Directory Structure Report\n")?;
51
52 if !filters.is_empty() {
53 writeln!(
54 output,
55 "This document contains files from the `{}` directory with extensions: {}",
56 input_dir_name,
57 filters.join(", ")
58 )?;
59 } else {
60 writeln!(
61 output,
62 "This document contains all files from the `{}` directory, optimized for LLM consumption.",
63 input_dir_name
64 )?;
65 }
66
67 if !ignores.is_empty() {
68 writeln!(output, "Custom ignored patterns: {}", ignores.join(", "))?;
69 }
70
71 writeln!(
72 output,
73 "Processed at: {}",
74 Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
75 )?;
76 writeln!(output)?;
77
78 writeln!(output, "## File Tree Structure\n")?;
81
82 write_tree_to_file(&mut output, file_tree, 0)?;
83
84 writeln!(output)?;
85
86 #[cfg(feature = "parallel")]
90 {
91 use rayon::prelude::*;
92
93 type ChunkResult = (usize, io::Result<Vec<u8>>);
95 let (sender, receiver): (Sender<ChunkResult>, Receiver<ChunkResult>) =
96 bounded(num_cpus::get() * 2); let writer_handle = {
99 let mut output = output;
100 let total_files = files.len();
101
102 thread::spawn(move || -> io::Result<()> {
103 let mut completed_chunks = std::collections::BTreeMap::new();
104 let mut next_index = 0;
105 let mut errors = Vec::new();
106
107 while next_index < total_files {
109 match receiver.recv() {
110 Ok((index, chunk_result)) => {
111 completed_chunks.insert(index, chunk_result);
112
113 while let Some(chunk_result) = completed_chunks.remove(&next_index) {
115 match chunk_result {
116 Ok(buf) => {
117 if let Err(e) = output.write_all(&buf) {
118 errors.push(format!(
119 "Failed to write output for file index {}: {}",
120 next_index, e
121 ));
122 }
123 }
124 Err(e) => {
125 errors.push(format!(
126 "Failed to process file index {}: {}",
127 next_index, e
128 ));
129 }
130 }
131 next_index += 1;
132 }
133 }
134 Err(_) => break, }
136 }
137
138 if !errors.is_empty() {
139 error!(
140 "Encountered {} errors during parallel processing:",
141 errors.len()
142 );
143 for err in &errors {
144 error!(" {}", err);
145 }
146 return Err(std::io::Error::other(format!(
147 "Failed to process {} files: {}",
148 errors.len(),
149 errors.join("; ")
150 )));
151 }
152
153 Ok(())
154 })
155 };
156
157 files.par_iter().enumerate().for_each(|(index, entry)| {
159 let mut buf = Vec::new();
160 let result = process_file(
161 base_path,
162 entry.path(),
163 &mut buf,
164 line_numbers,
165 encoding_strategy,
166 )
167 .map(|_| buf);
168
169 let _ = sender.send((index, result));
171 });
172
173 drop(sender);
175
176 writer_handle
178 .join()
179 .map_err(|_| std::io::Error::other("Writer thread panicked"))??;
180 }
181
182 #[cfg(not(feature = "parallel"))]
183 {
184 for entry in files {
185 process_file(
186 base_path,
187 entry.path(),
188 &mut output,
189 line_numbers,
190 encoding_strategy,
191 )?;
192 }
193 }
194
195 Ok(())
196}
197
198pub fn process_file(
200 base_path: &Path,
201
202 file_path: &Path,
203
204 output: &mut impl Write,
205 line_numbers: bool,
206 encoding_strategy: Option<&str>,
207) -> io::Result<()> {
208 let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
209 info!("Processing file: {}", relative_path.display());
210
211 let metadata = match fs::metadata(file_path) {
212 Ok(meta) => meta,
213 Err(e) => {
214 error!(
215 "Failed to get metadata for {}: {}",
216 relative_path.display(),
217 e
218 );
219 return Ok(());
220 }
221 };
222
223 let modified_time = metadata
224 .modified()
225 .ok()
226 .map(|time| {
227 let system_time: chrono::DateTime<Utc> = time.into();
228 system_time.format("%Y-%m-%d %H:%M:%S UTC").to_string()
229 })
230 .unwrap_or_else(|| "Unknown".to_string());
231
232 writeln!(output)?;
233 writeln!(output, "### File: `{}`", relative_path.display())?;
234
235 writeln!(output)?;
236
237 writeln!(output, "- Size: {} bytes", metadata.len())?;
238 writeln!(output, "- Modified: {}", modified_time)?;
239 writeln!(output)?;
240
241 let extension = file_path
243 .extension()
244 .and_then(|s| s.to_str())
245 .unwrap_or("text");
246 let language = match extension {
247 "rs" => "rust",
248 "js" => "javascript",
249 "ts" => "typescript",
250 "jsx" => "jsx",
251 "tsx" => "tsx",
252 "json" => "json",
253 "toml" => "toml",
254 "md" => "markdown",
255 "yaml" | "yml" => "yaml",
256 "html" => "html",
257 "css" => "css",
258 "py" => "python",
259 "java" => "java",
260 "cpp" => "cpp",
261 "c" => "c",
262 "h" => "c",
263 "hpp" => "cpp",
264 "sql" => "sql",
265 "sh" => "bash",
266 "xml" => "xml",
267 "lock" => "toml",
268 _ => extension,
269 };
270
271 match fs::File::open(file_path) {
273 Ok(mut file) => {
274 let mut sniff = [0u8; 8192];
275 let n = match file.read(&mut sniff) {
276 Ok(n) => n,
277 Err(e) => {
278 warn!(
279 "Could not read file {}: {}. Skipping content.",
280 relative_path.display(),
281 e
282 );
283
284 writeln!(output, "```text")?;
285
286 writeln!(
287 output,
288 "<Could not read file content (e.g., binary file or permission error)>"
289 )?;
290
291 writeln!(output, "```")?;
292
293 return Ok(());
294 }
295 };
296 let slice = &sniff[..n];
297
298 let check_len = if n == sniff.len() {
302 let mut end = n;
304 while end > 0 && end > n.saturating_sub(4) && sniff[end - 1] & 0xC0 == 0x80 {
305 end -= 1; }
307 if end > 0 && end < n {
309 let leading = sniff[end - 1];
310 let expected_len = if leading & 0xE0 == 0xC0 { 2 }
311 else if leading & 0xF0 == 0xE0 { 3 }
312 else if leading & 0xF8 == 0xF0 { 4 }
313 else { 1 };
314 if end - 1 + expected_len > n {
315 end - 1 } else {
317 n
318 }
319 } else {
320 n
321 }
322 } else {
323 n };
325
326 let is_utf8 = std::str::from_utf8(&sniff[..check_len]).is_ok();
328
329 if is_utf8 && !slice.contains(&0) {
330 } else {
332 let (encoding, _consumed) =
335 encoding_rs::Encoding::for_bom(slice).unwrap_or((encoding_rs::UTF_8, 0));
336
337 let detected_encoding = if encoding == UTF_8 {
339 detect_text_encoding(slice)
341 } else {
342 Some(encoding)
343 };
344
345 match detected_encoding {
346 Some(enc) if enc != UTF_8 => {
347 let strategy = encoding_strategy.unwrap_or("detect");
348 match strategy {
349 "strict" | "skip" => {
350 warn!(
352 "Skipping non-UTF-8 file {} (encoding: {}, strategy: {})",
353 relative_path.display(),
354 enc.name(),
355 strategy
356 );
357 }
358 _ => {
359 match transcode_file_content(file_path, enc) {
361 Ok(transcoded_content) => {
362 info!(
363 "Successfully transcoded {} from {} to UTF-8",
364 relative_path.display(),
365 enc.name()
366 );
367 write_text_content(
368 output,
369 &transcoded_content,
370 language,
371 line_numbers,
372 )?;
373 return Ok(());
374 }
375 Err(e) => {
376 warn!(
377 "Failed to transcode {} from {}: {}. Treating as binary.",
378 relative_path.display(),
379 enc.name(),
380 e
381 );
382 }
383 }
384 }
385 }
386 }
387 _ => {
388 if slice.contains(&0) {
390 warn!(
391 "Detected binary file {} (contains null bytes). Skipping content.",
392 relative_path.display()
393 );
394 } else {
395 warn!(
396 "Could not determine encoding for {}. Treating as binary.",
397 relative_path.display()
398 );
399 }
400 }
401 }
402
403 writeln!(output, "```text")?;
405 writeln!(
406 output,
407 "<Binary file or unsupported encoding: {} bytes>",
408 metadata.len()
409 )?;
410 writeln!(output, "```")?;
411 return Ok(());
412 }
413
414 if let Err(e) = file.seek(SeekFrom::Start(0)) {
416 warn!(
417 "Could not reset file cursor for {}: {}. Skipping content.",
418 relative_path.display(),
419 e
420 );
421 writeln!(output, "```text")?;
422 writeln!(
423 output,
424 "<Could not read file content (e.g., binary file or permission error)>"
425 )?;
426 writeln!(output, "```")?;
427 return Ok(());
428 }
429
430 let content = match std::fs::read_to_string(file_path) {
432 Ok(content) => content,
433 Err(e) => {
434 warn!(
435 "Error reading file {}: {}. Output may be truncated.",
436 relative_path.display(),
437 e
438 );
439 writeln!(output, "```text")?;
440 writeln!(output, "<Error reading file content>")?;
441 writeln!(output, "```")?;
442 return Ok(());
443 }
444 };
445
446 write_text_content(output, &content, language, line_numbers)?;
447 }
448 Err(e) => {
449 warn!(
450 "Could not open file {}: {}. Skipping content.",
451 relative_path.display(),
452 e
453 );
454 writeln!(output, "```text")?;
455 writeln!(
456 output,
457 "<Could not read file content (e.g., binary file or permission error)>"
458 )?;
459 writeln!(output, "```")?;
460 }
461 }
462
463 Ok(())
464}
465
466fn detect_text_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
468 let encodings = [
470 encoding_rs::WINDOWS_1252,
471 encoding_rs::UTF_16LE,
472 encoding_rs::UTF_16BE,
473 encoding_rs::SHIFT_JIS,
474 ];
475
476 for encoding in &encodings {
477 let (decoded, _, had_errors) = encoding.decode(bytes);
478 if !had_errors && is_likely_text(&decoded) {
479 return Some(encoding);
480 }
481 }
482
483 None
484}
485
486fn is_likely_text(content: &str) -> bool {
488 let mut control_chars = 0;
489 let mut total_chars = 0;
490
491 for ch in content.chars() {
492 total_chars += 1;
493 if ch.is_control() && ch != '\n' && ch != '\r' && ch != '\t' {
494 control_chars += 1;
495 }
496
497 if total_chars > 100 && control_chars * 20 > total_chars {
499 return false;
500 }
501 }
502
503 if total_chars > 0 {
505 control_chars * 20 <= total_chars
506 } else {
507 true
508 }
509}
510
511fn transcode_file_content(file_path: &Path, encoding: &'static Encoding) -> io::Result<String> {
513 let bytes = std::fs::read(file_path)?;
514 let (decoded, _, had_errors) = encoding.decode(&bytes);
515
516 if had_errors {
517 return Err(io::Error::new(
518 io::ErrorKind::InvalidData,
519 format!("Failed to decode file with encoding {}", encoding.name()),
520 ));
521 }
522
523 Ok(decoded.into_owned())
524}
525
526fn write_text_content(
528 output: &mut impl Write,
529 content: &str,
530 language: &str,
531 line_numbers: bool,
532) -> io::Result<()> {
533 writeln!(output, "```{}", language)?;
534
535 if line_numbers {
536 for (i, line) in content.lines().enumerate() {
537 writeln!(output, "{:>4} | {}", i + 1, line)?;
538 }
539 } else {
540 output.write_all(content.as_bytes())?;
541 if !content.ends_with('\n') {
542 writeln!(output)?;
543 }
544 }
545
546 writeln!(output, "```")?;
547 Ok(())
548}
549
550#[cfg(test)]
551mod tests {
552 use super::*;
553 use std::fs;
554 use tempfile::tempdir;
555
556 #[test]
557 fn test_code_block_formatting() {
558 let dir = tempdir().unwrap();
559 let base_path = dir.path();
560 let file_path = base_path.join("test.rs");
561 let output_path = base_path.join("output.md");
562
563 fs::write(
565 &file_path,
566 "fn main() {\n println!(\"Hello, world!\");\n}",
567 )
568 .unwrap();
569
570 let mut output = fs::File::create(&output_path).unwrap();
572
573 process_file(base_path, &file_path, &mut output, false, None).unwrap();
575
576 let content = fs::read_to_string(&output_path).unwrap();
578
579 assert!(content.contains("```rust"));
581 assert!(content.contains("```") && content.matches("```").count() >= 2);
582 }
583
584 #[test]
585 fn test_markdown_file_formatting() {
586 let dir = tempdir().unwrap();
587 let base_path = dir.path();
588 let file_path = base_path.join("README.md");
589 let output_path = base_path.join("output.md");
590
591 fs::write(&file_path, "# Test\n\nThis is a test markdown file.").unwrap();
593
594 let mut output = fs::File::create(&output_path).unwrap();
596
597 process_file(base_path, &file_path, &mut output, false, None).unwrap();
599
600 let content = fs::read_to_string(&output_path).unwrap();
602
603 println!("Generated content:\n{}", content);
605
606 assert!(
608 content.contains("```markdown"),
609 "Content should contain '```markdown' but was: {}",
610 content
611 );
612 let code_block_markers = content.matches("```").count();
614
615 assert!(
616 code_block_markers >= 2,
617 "Expected at least 2 code block markers, found {}",
618 code_block_markers
619 );
620 }
621
622 #[test]
623 fn test_line_numbered_code_blocks() {
624 let dir = tempdir().unwrap();
625 let base_path = dir.path();
626 let file_path = base_path.join("lib.rs");
627 let output_path = base_path.join("out.md");
628
629 fs::write(
631 &file_path,
632 "fn add(a: i32, b: i32) -> i32 {\n a + b\n}\n\nfn main() {\n println!(\"{}\", add(1, 2));\n}\n",
633 )
634 .unwrap();
635
636 let mut output = fs::File::create(&output_path).unwrap();
637 process_file(base_path, &file_path, &mut output, true, None).unwrap();
638
639 let content = fs::read_to_string(&output_path).unwrap();
640
641 assert!(content.contains("```rust"));
643 assert!(content.contains(" 1 | "));
644 assert!(content.contains(" 2 | "));
645
646 let numbered_lines = content
648 .lines()
649 .filter(|l| {
650 l.trim_start()
651 .chars()
652 .next()
653 .map(|c| c.is_ascii_digit())
654 .unwrap_or(false)
655 && l.contains(" | ")
656 })
657 .count();
658 let original_line_count = fs::read_to_string(&file_path).unwrap().lines().count();
659 assert_eq!(numbered_lines, original_line_count);
660
661 assert!(content.contains("```"));
663 }
664
665 #[test]
666 fn test_binary_file_handling() {
667 let dir = tempdir().unwrap();
668 let base_path = dir.path();
669 let file_path = base_path.join("image.bin");
670 let output_path = base_path.join("out.md");
671
672 let bytes = vec![
674 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
679 fs::write(&file_path, bytes).unwrap();
680
681 let mut output = fs::File::create(&output_path).unwrap();
682 process_file(base_path, &file_path, &mut output, false, None).unwrap();
683
684 let content = fs::read_to_string(&output_path).unwrap();
685
686 assert!(content.contains("```text"));
688 assert!(content.contains("<Binary file or unsupported encoding:"));
689
690 let fence_count = content.matches("```").count();
692 assert!(
693 fence_count >= 2,
694 "expected at least opening and closing fences, got {}",
695 fence_count
696 );
697 }
698
699 #[test]
700 fn test_encoding_detection_and_transcoding() {
701 let dir = tempdir().unwrap();
702 let base_path = dir.path();
703 let output_path = base_path.join("out.md");
704
705 let windows1252_content = [
707 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, 0x0A, ];
711 let file_path = base_path.join("windows1252.txt");
712 fs::write(&file_path, windows1252_content).unwrap();
713
714 let mut output = fs::File::create(&output_path).unwrap();
715 process_file(base_path, &file_path, &mut output, false, Some("detect")).unwrap();
716
717 let content = fs::read_to_string(&output_path).unwrap();
718
719 assert!(content.contains("Hello"));
721 assert!(content.contains("World"));
722 assert!(content.contains("```txt"));
724
725 let fence_count = content.matches("```").count();
727 assert!(
728 fence_count >= 2,
729 "expected at least opening and closing fences, got {}",
730 fence_count
731 );
732 }
733
734 #[test]
735 fn test_encoding_strategy_strict() {
736 let dir = tempdir().unwrap();
737 let base_path = dir.path();
738 let output_path = base_path.join("out.md");
739
740 let non_utf8_content = [0xFF, 0xFE, 0x41, 0x00]; let file_path = base_path.join("utf16.txt");
743 fs::write(&file_path, non_utf8_content).unwrap();
744
745 let mut output = fs::File::create(&output_path).unwrap();
746 process_file(base_path, &file_path, &mut output, false, Some("strict")).unwrap();
747
748 let content = fs::read_to_string(&output_path).unwrap();
749
750 assert!(content.contains("<Binary file or unsupported encoding:"));
752 assert!(content.contains("```text"));
753
754 let fence_count = content.matches("```").count();
756 assert!(
757 fence_count >= 2,
758 "expected at least opening and closing fences, got {}",
759 fence_count
760 );
761 }
762
763 #[test]
764 fn test_encoding_strategy_skip() {
765 let dir = tempdir().unwrap();
766 let base_path = dir.path();
767 let output_path = base_path.join("out.md");
768
769 let utf16_content = [0xFF, 0xFE, 0x48, 0x00, 0x69, 0x00]; let file_path = base_path.join("utf16.txt");
772 fs::write(&file_path, utf16_content).unwrap();
773
774 let mut output = fs::File::create(&output_path).unwrap();
775 process_file(base_path, &file_path, &mut output, false, Some("skip")).unwrap();
776
777 let content = fs::read_to_string(&output_path).unwrap();
778
779 assert!(content.contains("<Binary file or unsupported encoding:"));
781 assert!(content.contains("```text"));
782 }
783
784 #[test]
785 fn test_generate_markdown_with_current_directory() {
786 let dir = tempdir().unwrap();
787 let base_path = dir.path();
788 let output_path = base_path.join("test.md");
789
790 fs::write(base_path.join("readme.txt"), "Hello world").unwrap();
792
793 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
795 let file_tree = crate::tree::build_file_tree(&files, base_path);
796
797 let original_dir = std::env::current_dir().unwrap();
799 std::env::set_current_dir(base_path).unwrap();
800
801 let result = generate_markdown(
803 &output_path.to_string_lossy(),
804 ".",
805 &[],
806 &[],
807 &file_tree,
808 &files,
809 base_path,
810 false,
811 None,
812 );
813
814 std::env::set_current_dir(original_dir).unwrap();
816
817 assert!(result.is_ok());
818 let content = fs::read_to_string(&output_path).unwrap();
819 assert!(content.contains("Directory Structure Report"));
820 }
821
822 #[test]
823 fn test_generate_markdown_creates_output_directory() {
824 let dir = tempdir().unwrap();
825 let base_path = dir.path();
826 let nested_output = base_path.join("nested").join("deep").join("output.md");
827
828 fs::write(base_path.join("test.txt"), "content").unwrap();
830
831 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
832 let file_tree = crate::tree::build_file_tree(&files, base_path);
833
834 let result = generate_markdown(
835 &nested_output.to_string_lossy(),
836 "test_dir",
837 &[],
838 &[],
839 &file_tree,
840 &files,
841 base_path,
842 false,
843 None,
844 );
845
846 assert!(result.is_ok());
847 assert!(nested_output.exists());
848 assert!(nested_output.parent().unwrap().exists());
849 }
850
851 #[test]
852 fn test_generate_markdown_with_filters_and_ignores() {
853 let dir = tempdir().unwrap();
854 let base_path = dir.path();
855 let output_path = base_path.join("filtered.md");
856
857 fs::write(base_path.join("main.rs"), "fn main() {}").unwrap();
858 fs::write(base_path.join("config.toml"), "[package]").unwrap();
859 fs::write(base_path.join("readme.md"), "# README").unwrap();
860
861 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
862 let file_tree = crate::tree::build_file_tree(&files, base_path);
863
864 let result = generate_markdown(
865 &output_path.to_string_lossy(),
866 "project",
867 &["rs".to_string(), "toml".to_string()],
868 &["readme.md".to_string()],
869 &file_tree,
870 &files,
871 base_path,
872 true,
873 Some("strict"),
874 );
875
876 assert!(result.is_ok());
877 let content = fs::read_to_string(&output_path).unwrap();
878 assert!(content.contains("Directory Structure Report"));
879 assert!(content.contains("main.rs") || content.contains("config.toml"));
881 }
882
883 #[test]
884 fn test_write_text_content_with_line_numbers() {
885 let mut output = Vec::new();
886 let content = "line one\nline two\nline three";
887
888 write_text_content(&mut output, content, "rust", true).unwrap();
889
890 let result = String::from_utf8(output).unwrap();
891 assert!(result.contains("```rust"));
892 assert!(result.contains(" 1 | line one"));
893 assert!(result.contains(" 2 | line two"));
894 assert!(result.contains(" 3 | line three"));
895 assert!(result.contains("```"));
896 }
897
898 #[test]
899 fn test_write_text_content_without_line_numbers() {
900 let mut output = Vec::new();
901 let content = "function test() {\n return true;\n}";
902
903 write_text_content(&mut output, content, "javascript", false).unwrap();
904
905 let result = String::from_utf8(output).unwrap();
906 assert!(result.contains("```javascript"));
907 assert!(result.contains("function test() {"));
908 assert!(result.contains(" return true;"));
909 assert!(result.contains("```"));
910 assert!(!result.contains(" | ")); }
912
913 #[test]
914 fn test_write_text_content_without_trailing_newline() {
915 let mut output = Vec::new();
916 let content = "no newline at end"; write_text_content(&mut output, content, "text", false).unwrap();
919
920 let result = String::from_utf8(output).unwrap();
921 assert!(result.contains("```text"));
922 assert!(result.contains("no newline at end"));
923 assert!(result.ends_with("```\n")); }
925
926 #[test]
927 fn test_is_likely_text() {
928 assert!(is_likely_text("Hello world\nThis is normal text"));
930
931 assert!(is_likely_text(
933 "Line 1\nLine 2\tTabbed\r\nWindows line ending"
934 ));
935
936 let mut bad_text = String::new();
938 for i in 0..200 {
939 if i % 5 == 0 {
940 bad_text.push('\x01'); } else {
942 bad_text.push('a');
943 }
944 }
945 assert!(!is_likely_text(&bad_text));
946
947 assert!(is_likely_text(""));
949 }
950
951 #[test]
952 fn test_detect_text_encoding() {
953 let utf8_bytes = "Hello world".as_bytes();
955 let result = detect_text_encoding(utf8_bytes);
956 assert!(result.is_some() || result.is_none());
959
960 let windows1252_bytes = [
962 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x94,
963 ];
964 let detected = detect_text_encoding(&windows1252_bytes);
965 assert!(detected.is_some());
966 }
967
968 #[test]
969 fn test_transcode_file_content() {
970 let dir = tempdir().unwrap();
971 let file_path = dir.path().join("windows1252.txt");
972
973 let windows1252_content = [
975 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, ];
978 fs::write(&file_path, windows1252_content).unwrap();
979
980 let result = transcode_file_content(&file_path, encoding_rs::WINDOWS_1252);
981 assert!(result.is_ok());
982
983 let transcoded = result.unwrap();
984 assert!(transcoded.contains("Hello"));
985 assert!(transcoded.contains("World"));
986 }
987
988 #[test]
989 fn test_process_file_with_metadata_error() {
990 let dir = tempdir().unwrap();
991 let base_path = dir.path();
992 let nonexistent_file = base_path.join("nonexistent.txt");
993 let output_path = base_path.join("output.md");
994
995 let mut output = fs::File::create(&output_path).unwrap();
996
997 let result = process_file(base_path, &nonexistent_file, &mut output, false, None);
999 assert!(result.is_ok());
1000
1001 let content = fs::read_to_string(&output_path).unwrap();
1003 assert!(content.is_empty() || content.trim().is_empty());
1004 }
1005
1006 #[test]
1007 fn test_process_file_with_different_extensions() {
1008 let dir = tempdir().unwrap();
1009 let base_path = dir.path();
1010 let output_path = base_path.join("output.md");
1011
1012 let test_files = [
1014 ("script.py", "print('hello')", "python"),
1015 ("data.json", r#"{"key": "value"}"#, "json"),
1016 ("config.yaml", "key: value", "yaml"),
1017 ("style.css", "body { margin: 0; }", "css"),
1018 ("page.html", "<html><body>Test</body></html>", "html"),
1019 ("query.sql", "SELECT * FROM users;", "sql"),
1020 ("build.sh", "#!/bin/bash\necho 'building'", "bash"),
1021 ("unknown.xyz", "unknown content", "xyz"),
1022 ];
1023
1024 for (filename, content, expected_lang) in test_files.iter() {
1025 let file_path = base_path.join(filename);
1026 fs::write(&file_path, content).unwrap();
1027
1028 let mut output = fs::File::create(&output_path).unwrap();
1029 process_file(base_path, &file_path, &mut output, false, None).unwrap();
1030
1031 let result = fs::read_to_string(&output_path).unwrap();
1032 assert!(result.contains(&format!("```{}", expected_lang)));
1033 assert!(result.contains(content));
1034 assert!(result.contains(filename));
1035 }
1036 }
1037}