1use chrono::Utc;
2use ignore::DirEntry;
3use log::{error, info, warn};
4use std::fs;
5use std::io::{self, Read, Seek, SeekFrom, Write};
6use std::path::Path;
7
8use crate::tree::{FileTree, write_tree_to_file};
9use encoding_rs::{Encoding, UTF_8};
10
11#[cfg(feature = "parallel")]
12use crossbeam_channel::{Receiver, Sender, bounded};
13#[cfg(feature = "parallel")]
14use std::thread;
15
16#[allow(clippy::too_many_arguments, unused_variables)]
18pub fn generate_markdown(
19 output_path: &str,
20 input_dir: &str,
21 filters: &[String],
22 ignores: &[String],
23 file_tree: &FileTree,
24 files: &[DirEntry],
25 base_path: &Path,
26 line_numbers: bool,
27 encoding_strategy: Option<&str>,
28 max_tokens: Option<usize>,
29) -> io::Result<()> {
30 if let Some(parent) = Path::new(output_path).parent()
31 && !parent.exists()
32 {
33 fs::create_dir_all(parent)?;
34 }
35
36 let mut output = fs::File::create(output_path)?;
37
38 let input_dir_name = if input_dir == "." {
39 let current_dir = std::env::current_dir()?;
40 current_dir
41 .file_name()
42 .unwrap()
43 .to_str()
44 .unwrap()
45 .to_string()
46 } else {
47 input_dir.to_string()
48 };
49
50 writeln!(output, "# Directory Structure Report\n")?;
52
53 if !filters.is_empty() {
54 writeln!(
55 output,
56 "This document contains files from the `{}` directory with extensions: {}",
57 input_dir_name,
58 filters.join(", ")
59 )?;
60 } else {
61 writeln!(
62 output,
63 "This document contains all files from the `{}` directory, optimized for LLM consumption.",
64 input_dir_name
65 )?;
66 }
67
68 if !ignores.is_empty() {
69 writeln!(output, "Custom ignored patterns: {}", ignores.join(", "))?;
70 }
71
72 let mut content_hasher = xxhash_rust::xxh3::Xxh3::new();
76 for entry in files {
77 let rel_path = entry.path().strip_prefix(base_path).unwrap_or(entry.path());
81 let normalized = rel_path.to_string_lossy().replace('\\', "/");
82 content_hasher.update(normalized.as_bytes());
83 content_hasher.update(b"\0");
85 if let Ok(bytes) = std::fs::read(entry.path()) {
87 content_hasher.update(&bytes);
88 }
89 content_hasher.update(b"\0");
90 }
91 writeln!(output, "Content hash: {:016x}", content_hasher.digest())?;
92 writeln!(output)?;
93
94 writeln!(output, "## File Tree Structure\n")?;
97
98 write_tree_to_file(&mut output, file_tree, 0)?;
99
100 writeln!(output)?;
101
102 #[cfg(feature = "parallel")]
106 {
107 use rayon::prelude::*;
108
109 type ChunkResult = (usize, io::Result<Vec<u8>>);
111 let (sender, receiver): (Sender<ChunkResult>, Receiver<ChunkResult>) =
112 bounded(num_cpus::get() * 2); let writer_handle = {
115 let mut output = output;
116 let total_files = files.len();
117 let budget = max_tokens;
118
119 thread::spawn(move || -> io::Result<()> {
120 let mut completed_chunks = std::collections::BTreeMap::new();
121 let mut next_index = 0;
122 let mut errors = Vec::new();
123 let mut tokens_used: usize = 0;
124 let mut budget_exceeded = false;
125
126 while next_index < total_files {
128 match receiver.recv() {
129 Ok((index, chunk_result)) => {
130 completed_chunks.insert(index, chunk_result);
131
132 while let Some(chunk_result) = completed_chunks.remove(&next_index) {
134 if budget_exceeded {
135 next_index += 1;
137 continue;
138 }
139
140 match chunk_result {
141 Ok(buf) => {
142 let chunk_tokens = buf.len() / 4;
144
145 if let Some(max) = budget
146 && tokens_used + chunk_tokens > max
147 && tokens_used > 0
148 {
149 let remaining = total_files - next_index;
150 let notice = format!(
151 "---\n\n_⚠️ Token budget ({}) reached. {} remaining files omitted._\n\n",
152 max, remaining
153 );
154 if let Err(e) = output.write_all(notice.as_bytes()) {
155 errors.push(format!(
156 "Failed to write truncation notice: {}",
157 e
158 ));
159 }
160 budget_exceeded = true;
161 next_index += 1;
162 continue;
163 }
164
165 tokens_used += chunk_tokens;
166 if let Err(e) = output.write_all(&buf) {
167 errors.push(format!(
168 "Failed to write output for file index {}: {}",
169 next_index, e
170 ));
171 }
172 }
173 Err(e) => {
174 errors.push(format!(
175 "Failed to process file index {}: {}",
176 next_index, e
177 ));
178 }
179 }
180 next_index += 1;
181 }
182 }
183 Err(_) => break, }
185 }
186
187 if !errors.is_empty() {
188 error!(
189 "Encountered {} errors during parallel processing:",
190 errors.len()
191 );
192 for err in &errors {
193 error!(" {}", err);
194 }
195 return Err(std::io::Error::other(format!(
196 "Failed to process {} files: {}",
197 errors.len(),
198 errors.join("; ")
199 )));
200 }
201
202 Ok(())
203 })
204 };
205
206 files.par_iter().enumerate().for_each(|(index, entry)| {
208 let mut buf = Vec::new();
209 let result = process_file(
210 base_path,
211 entry.path(),
212 &mut buf,
213 line_numbers,
214 encoding_strategy,
215 )
216 .map(|_| buf);
217
218 let _ = sender.send((index, result));
220 });
221
222 drop(sender);
224
225 writer_handle
227 .join()
228 .map_err(|_| std::io::Error::other("Writer thread panicked"))??;
229 }
230
231 #[cfg(not(feature = "parallel"))]
232 {
233 let mut tokens_used: usize = 0;
234
235 for (idx, entry) in files.iter().enumerate() {
236 let file_size = std::fs::metadata(entry.path())
238 .map(|m| m.len())
239 .unwrap_or(0);
240 let estimated_file_tokens = (file_size as usize) / 4;
241
242 if let Some(budget) = max_tokens {
243 if tokens_used + estimated_file_tokens > budget && tokens_used > 0 {
244 let remaining = files.len() - idx;
245 writeln!(output, "---\n")?;
246 writeln!(
247 output,
248 "_⚠️ Token budget ({}) reached. {} remaining files omitted._\n",
249 budget, remaining
250 )?;
251 break;
252 }
253 }
254
255 tokens_used += estimated_file_tokens;
256 process_file(
257 base_path,
258 entry.path(),
259 &mut output,
260 line_numbers,
261 encoding_strategy,
262 )?;
263 }
264 }
265
266 Ok(())
267}
268
269pub fn process_file(
271 base_path: &Path,
272
273 file_path: &Path,
274
275 output: &mut impl Write,
276 line_numbers: bool,
277 encoding_strategy: Option<&str>,
278) -> io::Result<()> {
279 let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
280 info!("Processing file: {}", relative_path.display());
281
282 let metadata = match fs::metadata(file_path) {
283 Ok(meta) => meta,
284 Err(e) => {
285 error!(
286 "Failed to get metadata for {}: {}",
287 relative_path.display(),
288 e
289 );
290 return Ok(());
291 }
292 };
293
294 let modified_time = metadata
295 .modified()
296 .ok()
297 .map(|time| {
298 let system_time: chrono::DateTime<Utc> = time.into();
299 system_time.format("%Y-%m-%d %H:%M:%S UTC").to_string()
300 })
301 .unwrap_or_else(|| "Unknown".to_string());
302
303 writeln!(output)?;
304 writeln!(output, "### File: `{}`", relative_path.display())?;
305
306 writeln!(output)?;
307
308 writeln!(output, "- Size: {} bytes", metadata.len())?;
309 writeln!(output, "- Modified: {}", modified_time)?;
310 writeln!(output)?;
311
312 let extension = file_path
314 .extension()
315 .and_then(|s| s.to_str())
316 .unwrap_or("text");
317 let language = match extension {
318 "rs" => "rust",
319 "js" => "javascript",
320 "ts" => "typescript",
321 "jsx" => "jsx",
322 "tsx" => "tsx",
323 "json" => "json",
324 "toml" => "toml",
325 "md" => "markdown",
326 "yaml" | "yml" => "yaml",
327 "html" => "html",
328 "css" => "css",
329 "py" => "python",
330 "java" => "java",
331 "cpp" => "cpp",
332 "c" => "c",
333 "h" => "c",
334 "hpp" => "cpp",
335 "sql" => "sql",
336 "sh" => "bash",
337 "xml" => "xml",
338 "lock" => "toml",
339 _ => extension,
340 };
341
342 match fs::File::open(file_path) {
344 Ok(mut file) => {
345 let mut sniff = [0u8; 8192];
346 let n = match file.read(&mut sniff) {
347 Ok(n) => n,
348 Err(e) => {
349 warn!(
350 "Could not read file {}: {}. Skipping content.",
351 relative_path.display(),
352 e
353 );
354
355 writeln!(output, "```text")?;
356
357 writeln!(
358 output,
359 "<Could not read file content (e.g., binary file or permission error)>"
360 )?;
361
362 writeln!(output, "```")?;
363
364 return Ok(());
365 }
366 };
367 let slice = &sniff[..n];
368
369 let check_len = if n == sniff.len() {
373 let mut end = n;
375 while end > 0 && end > n.saturating_sub(4) && sniff[end - 1] & 0xC0 == 0x80 {
376 end -= 1; }
378 if end > 0 && end < n {
380 let leading = sniff[end - 1];
381 let expected_len = if leading & 0xE0 == 0xC0 {
382 2
383 } else if leading & 0xF0 == 0xE0 {
384 3
385 } else if leading & 0xF8 == 0xF0 {
386 4
387 } else {
388 1
389 };
390 if end - 1 + expected_len > n {
391 end - 1 } else {
393 n
394 }
395 } else {
396 n
397 }
398 } else {
399 n };
401
402 let is_utf8 = std::str::from_utf8(&sniff[..check_len]).is_ok();
404
405 if is_utf8 && !slice.contains(&0) {
406 } else {
408 let (encoding, _consumed) =
411 encoding_rs::Encoding::for_bom(slice).unwrap_or((encoding_rs::UTF_8, 0));
412
413 let detected_encoding = if encoding == UTF_8 {
415 detect_text_encoding(slice)
417 } else {
418 Some(encoding)
419 };
420
421 match detected_encoding {
422 Some(enc) if enc != UTF_8 => {
423 let strategy = encoding_strategy.unwrap_or("detect");
424 match strategy {
425 "strict" | "skip" => {
426 warn!(
428 "Skipping non-UTF-8 file {} (encoding: {}, strategy: {})",
429 relative_path.display(),
430 enc.name(),
431 strategy
432 );
433 }
434 _ => {
435 match transcode_file_content(file_path, enc) {
437 Ok(transcoded_content) => {
438 info!(
439 "Successfully transcoded {} from {} to UTF-8",
440 relative_path.display(),
441 enc.name()
442 );
443 write_text_content(
444 output,
445 &transcoded_content,
446 language,
447 line_numbers,
448 )?;
449 return Ok(());
450 }
451 Err(e) => {
452 warn!(
453 "Failed to transcode {} from {}: {}. Treating as binary.",
454 relative_path.display(),
455 enc.name(),
456 e
457 );
458 }
459 }
460 }
461 }
462 }
463 _ => {
464 if slice.contains(&0) {
466 warn!(
467 "Detected binary file {} (contains null bytes). Skipping content.",
468 relative_path.display()
469 );
470 } else {
471 warn!(
472 "Could not determine encoding for {}. Treating as binary.",
473 relative_path.display()
474 );
475 }
476 }
477 }
478
479 writeln!(output, "```text")?;
481 writeln!(
482 output,
483 "<Binary file or unsupported encoding: {} bytes>",
484 metadata.len()
485 )?;
486 writeln!(output, "```")?;
487 return Ok(());
488 }
489
490 if let Err(e) = file.seek(SeekFrom::Start(0)) {
492 warn!(
493 "Could not reset file cursor for {}: {}. Skipping content.",
494 relative_path.display(),
495 e
496 );
497 writeln!(output, "```text")?;
498 writeln!(
499 output,
500 "<Could not read file content (e.g., binary file or permission error)>"
501 )?;
502 writeln!(output, "```")?;
503 return Ok(());
504 }
505
506 let content = match std::fs::read_to_string(file_path) {
508 Ok(content) => content,
509 Err(e) => {
510 warn!(
511 "Error reading file {}: {}. Output may be truncated.",
512 relative_path.display(),
513 e
514 );
515 writeln!(output, "```text")?;
516 writeln!(output, "<Error reading file content>")?;
517 writeln!(output, "```")?;
518 return Ok(());
519 }
520 };
521
522 write_text_content(output, &content, language, line_numbers)?;
523 }
524 Err(e) => {
525 warn!(
526 "Could not open file {}: {}. Skipping content.",
527 relative_path.display(),
528 e
529 );
530 writeln!(output, "```text")?;
531 writeln!(
532 output,
533 "<Could not read file content (e.g., binary file or permission error)>"
534 )?;
535 writeln!(output, "```")?;
536 }
537 }
538
539 Ok(())
540}
541
542fn detect_text_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
544 let encodings = [
546 encoding_rs::WINDOWS_1252,
547 encoding_rs::UTF_16LE,
548 encoding_rs::UTF_16BE,
549 encoding_rs::SHIFT_JIS,
550 ];
551
552 for encoding in &encodings {
553 let (decoded, _, had_errors) = encoding.decode(bytes);
554 if !had_errors && is_likely_text(&decoded) {
555 return Some(encoding);
556 }
557 }
558
559 None
560}
561
562fn is_likely_text(content: &str) -> bool {
564 let mut control_chars = 0;
565 let mut total_chars = 0;
566
567 for ch in content.chars() {
568 total_chars += 1;
569 if ch.is_control() && ch != '\n' && ch != '\r' && ch != '\t' {
570 control_chars += 1;
571 }
572
573 if total_chars > 100 && control_chars * 20 > total_chars {
575 return false;
576 }
577 }
578
579 if total_chars > 0 {
581 control_chars * 20 <= total_chars
582 } else {
583 true
584 }
585}
586
587fn transcode_file_content(file_path: &Path, encoding: &'static Encoding) -> io::Result<String> {
589 let bytes = std::fs::read(file_path)?;
590 let (decoded, _, had_errors) = encoding.decode(&bytes);
591
592 if had_errors {
593 return Err(io::Error::new(
594 io::ErrorKind::InvalidData,
595 format!("Failed to decode file with encoding {}", encoding.name()),
596 ));
597 }
598
599 Ok(decoded.into_owned())
600}
601
602fn write_text_content(
604 output: &mut impl Write,
605 content: &str,
606 language: &str,
607 line_numbers: bool,
608) -> io::Result<()> {
609 writeln!(output, "```{}", language)?;
610
611 if line_numbers {
612 for (i, line) in content.lines().enumerate() {
613 writeln!(output, "{:>4} | {}", i + 1, line)?;
614 }
615 } else {
616 output.write_all(content.as_bytes())?;
617 if !content.ends_with('\n') {
618 writeln!(output)?;
619 }
620 }
621
622 writeln!(output, "```")?;
623 Ok(())
624}
625
626#[cfg(test)]
627mod tests {
628 use super::*;
629 use std::fs;
630 use tempfile::tempdir;
631
632 #[test]
633 fn test_code_block_formatting() {
634 let dir = tempdir().unwrap();
635 let base_path = dir.path();
636 let file_path = base_path.join("test.rs");
637 let output_path = base_path.join("output.md");
638
639 fs::write(
641 &file_path,
642 "fn main() {\n println!(\"Hello, world!\");\n}",
643 )
644 .unwrap();
645
646 let mut output = fs::File::create(&output_path).unwrap();
648
649 process_file(base_path, &file_path, &mut output, false, None).unwrap();
651
652 let content = fs::read_to_string(&output_path).unwrap();
654
655 assert!(content.contains("```rust"));
657 assert!(content.contains("```") && content.matches("```").count() >= 2);
658 }
659
660 #[test]
661 fn test_markdown_file_formatting() {
662 let dir = tempdir().unwrap();
663 let base_path = dir.path();
664 let file_path = base_path.join("README.md");
665 let output_path = base_path.join("output.md");
666
667 fs::write(&file_path, "# Test\n\nThis is a test markdown file.").unwrap();
669
670 let mut output = fs::File::create(&output_path).unwrap();
672
673 process_file(base_path, &file_path, &mut output, false, None).unwrap();
675
676 let content = fs::read_to_string(&output_path).unwrap();
678
679 println!("Generated content:\n{}", content);
681
682 assert!(
684 content.contains("```markdown"),
685 "Content should contain '```markdown' but was: {}",
686 content
687 );
688 let code_block_markers = content.matches("```").count();
690
691 assert!(
692 code_block_markers >= 2,
693 "Expected at least 2 code block markers, found {}",
694 code_block_markers
695 );
696 }
697
698 #[test]
699 fn test_line_numbered_code_blocks() {
700 let dir = tempdir().unwrap();
701 let base_path = dir.path();
702 let file_path = base_path.join("lib.rs");
703 let output_path = base_path.join("out.md");
704
705 fs::write(
707 &file_path,
708 "fn add(a: i32, b: i32) -> i32 {\n a + b\n}\n\nfn main() {\n println!(\"{}\", add(1, 2));\n}\n",
709 )
710 .unwrap();
711
712 let mut output = fs::File::create(&output_path).unwrap();
713 process_file(base_path, &file_path, &mut output, true, None).unwrap();
714
715 let content = fs::read_to_string(&output_path).unwrap();
716
717 assert!(content.contains("```rust"));
719 assert!(content.contains(" 1 | "));
720 assert!(content.contains(" 2 | "));
721
722 let numbered_lines = content
724 .lines()
725 .filter(|l| {
726 l.trim_start()
727 .chars()
728 .next()
729 .map(|c| c.is_ascii_digit())
730 .unwrap_or(false)
731 && l.contains(" | ")
732 })
733 .count();
734 let original_line_count = fs::read_to_string(&file_path).unwrap().lines().count();
735 assert_eq!(numbered_lines, original_line_count);
736
737 assert!(content.contains("```"));
739 }
740
741 #[test]
742 fn test_binary_file_handling() {
743 let dir = tempdir().unwrap();
744 let base_path = dir.path();
745 let file_path = base_path.join("image.bin");
746 let output_path = base_path.join("out.md");
747
748 let bytes = vec![
750 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
755 fs::write(&file_path, bytes).unwrap();
756
757 let mut output = fs::File::create(&output_path).unwrap();
758 process_file(base_path, &file_path, &mut output, false, None).unwrap();
759
760 let content = fs::read_to_string(&output_path).unwrap();
761
762 assert!(content.contains("```text"));
764 assert!(content.contains("<Binary file or unsupported encoding:"));
765
766 let fence_count = content.matches("```").count();
768 assert!(
769 fence_count >= 2,
770 "expected at least opening and closing fences, got {}",
771 fence_count
772 );
773 }
774
775 #[test]
776 fn test_encoding_detection_and_transcoding() {
777 let dir = tempdir().unwrap();
778 let base_path = dir.path();
779 let output_path = base_path.join("out.md");
780
781 let windows1252_content = [
783 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, 0x0A, ];
787 let file_path = base_path.join("windows1252.txt");
788 fs::write(&file_path, windows1252_content).unwrap();
789
790 let mut output = fs::File::create(&output_path).unwrap();
791 process_file(base_path, &file_path, &mut output, false, Some("detect")).unwrap();
792
793 let content = fs::read_to_string(&output_path).unwrap();
794
795 assert!(content.contains("Hello"));
797 assert!(content.contains("World"));
798 assert!(content.contains("```txt"));
800
801 let fence_count = content.matches("```").count();
803 assert!(
804 fence_count >= 2,
805 "expected at least opening and closing fences, got {}",
806 fence_count
807 );
808 }
809
810 #[test]
811 fn test_encoding_strategy_strict() {
812 let dir = tempdir().unwrap();
813 let base_path = dir.path();
814 let output_path = base_path.join("out.md");
815
816 let non_utf8_content = [0xFF, 0xFE, 0x41, 0x00]; let file_path = base_path.join("utf16.txt");
819 fs::write(&file_path, non_utf8_content).unwrap();
820
821 let mut output = fs::File::create(&output_path).unwrap();
822 process_file(base_path, &file_path, &mut output, false, Some("strict")).unwrap();
823
824 let content = fs::read_to_string(&output_path).unwrap();
825
826 assert!(content.contains("<Binary file or unsupported encoding:"));
828 assert!(content.contains("```text"));
829
830 let fence_count = content.matches("```").count();
832 assert!(
833 fence_count >= 2,
834 "expected at least opening and closing fences, got {}",
835 fence_count
836 );
837 }
838
839 #[test]
840 fn test_encoding_strategy_skip() {
841 let dir = tempdir().unwrap();
842 let base_path = dir.path();
843 let output_path = base_path.join("out.md");
844
845 let utf16_content = [0xFF, 0xFE, 0x48, 0x00, 0x69, 0x00]; let file_path = base_path.join("utf16.txt");
848 fs::write(&file_path, utf16_content).unwrap();
849
850 let mut output = fs::File::create(&output_path).unwrap();
851 process_file(base_path, &file_path, &mut output, false, Some("skip")).unwrap();
852
853 let content = fs::read_to_string(&output_path).unwrap();
854
855 assert!(content.contains("<Binary file or unsupported encoding:"));
857 assert!(content.contains("```text"));
858 }
859
860 #[test]
861 fn test_generate_markdown_with_current_directory() {
862 let dir = tempdir().unwrap();
863 let base_path = dir.path();
864 let output_path = base_path.join("test.md");
865
866 fs::write(base_path.join("readme.txt"), "Hello world").unwrap();
868
869 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
871 let file_tree = crate::tree::build_file_tree(&files, base_path);
872
873 let original_dir = std::env::current_dir().unwrap();
875 std::env::set_current_dir(base_path).unwrap();
876
877 let result = generate_markdown(
879 &output_path.to_string_lossy(),
880 ".",
881 &[],
882 &[],
883 &file_tree,
884 &files,
885 base_path,
886 false,
887 None,
888 None, );
890
891 std::env::set_current_dir(original_dir).unwrap();
893
894 assert!(result.is_ok());
895 let content = fs::read_to_string(&output_path).unwrap();
896 assert!(content.contains("Directory Structure Report"));
897 }
898
899 #[test]
900 fn test_generate_markdown_creates_output_directory() {
901 let dir = tempdir().unwrap();
902 let base_path = dir.path();
903 let nested_output = base_path.join("nested").join("deep").join("output.md");
904
905 fs::write(base_path.join("test.txt"), "content").unwrap();
907
908 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
909 let file_tree = crate::tree::build_file_tree(&files, base_path);
910
911 let result = generate_markdown(
912 &nested_output.to_string_lossy(),
913 "test_dir",
914 &[],
915 &[],
916 &file_tree,
917 &files,
918 base_path,
919 false,
920 None,
921 None, );
923
924 assert!(result.is_ok());
925 assert!(nested_output.exists());
926 assert!(nested_output.parent().unwrap().exists());
927 }
928
929 #[test]
930 fn test_generate_markdown_with_filters_and_ignores() {
931 let dir = tempdir().unwrap();
932 let base_path = dir.path();
933 let output_path = base_path.join("filtered.md");
934
935 fs::write(base_path.join("main.rs"), "fn main() {}").unwrap();
936 fs::write(base_path.join("config.toml"), "[package]").unwrap();
937 fs::write(base_path.join("readme.md"), "# README").unwrap();
938
939 let files = crate::file_utils::collect_files(base_path, &[], &[], &[]).unwrap();
940 let file_tree = crate::tree::build_file_tree(&files, base_path);
941
942 let result = generate_markdown(
943 &output_path.to_string_lossy(),
944 "project",
945 &["rs".to_string(), "toml".to_string()],
946 &["readme.md".to_string()],
947 &file_tree,
948 &files,
949 base_path,
950 true,
951 Some("strict"),
952 None, );
954
955 assert!(result.is_ok());
956 let content = fs::read_to_string(&output_path).unwrap();
957 assert!(content.contains("Directory Structure Report"));
958 assert!(content.contains("main.rs") || content.contains("config.toml"));
960 }
961
962 #[test]
963 fn test_write_text_content_with_line_numbers() {
964 let mut output = Vec::new();
965 let content = "line one\nline two\nline three";
966
967 write_text_content(&mut output, content, "rust", true).unwrap();
968
969 let result = String::from_utf8(output).unwrap();
970 assert!(result.contains("```rust"));
971 assert!(result.contains(" 1 | line one"));
972 assert!(result.contains(" 2 | line two"));
973 assert!(result.contains(" 3 | line three"));
974 assert!(result.contains("```"));
975 }
976
977 #[test]
978 fn test_write_text_content_without_line_numbers() {
979 let mut output = Vec::new();
980 let content = "function test() {\n return true;\n}";
981
982 write_text_content(&mut output, content, "javascript", false).unwrap();
983
984 let result = String::from_utf8(output).unwrap();
985 assert!(result.contains("```javascript"));
986 assert!(result.contains("function test() {"));
987 assert!(result.contains(" return true;"));
988 assert!(result.contains("```"));
989 assert!(!result.contains(" | ")); }
991
992 #[test]
993 fn test_write_text_content_without_trailing_newline() {
994 let mut output = Vec::new();
995 let content = "no newline at end"; write_text_content(&mut output, content, "text", false).unwrap();
998
999 let result = String::from_utf8(output).unwrap();
1000 assert!(result.contains("```text"));
1001 assert!(result.contains("no newline at end"));
1002 assert!(result.ends_with("```\n")); }
1004
1005 #[test]
1006 fn test_is_likely_text() {
1007 assert!(is_likely_text("Hello world\nThis is normal text"));
1009
1010 assert!(is_likely_text(
1012 "Line 1\nLine 2\tTabbed\r\nWindows line ending"
1013 ));
1014
1015 let mut bad_text = String::new();
1017 for i in 0..200 {
1018 if i % 5 == 0 {
1019 bad_text.push('\x01'); } else {
1021 bad_text.push('a');
1022 }
1023 }
1024 assert!(!is_likely_text(&bad_text));
1025
1026 assert!(is_likely_text(""));
1028 }
1029
1030 #[test]
1031 fn test_detect_text_encoding() {
1032 let utf8_bytes = "Hello world".as_bytes();
1034 let result = detect_text_encoding(utf8_bytes);
1035 assert!(result.is_some() || result.is_none());
1038
1039 let windows1252_bytes = [
1041 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x94,
1042 ];
1043 let detected = detect_text_encoding(&windows1252_bytes);
1044 assert!(detected.is_some());
1045 }
1046
1047 #[test]
1048 fn test_transcode_file_content() {
1049 let dir = tempdir().unwrap();
1050 let file_path = dir.path().join("windows1252.txt");
1051
1052 let windows1252_content = [
1054 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x93, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x94, ];
1057 fs::write(&file_path, windows1252_content).unwrap();
1058
1059 let result = transcode_file_content(&file_path, encoding_rs::WINDOWS_1252);
1060 assert!(result.is_ok());
1061
1062 let transcoded = result.unwrap();
1063 assert!(transcoded.contains("Hello"));
1064 assert!(transcoded.contains("World"));
1065 }
1066
1067 #[test]
1068 fn test_process_file_with_metadata_error() {
1069 let dir = tempdir().unwrap();
1070 let base_path = dir.path();
1071 let nonexistent_file = base_path.join("nonexistent.txt");
1072 let output_path = base_path.join("output.md");
1073
1074 let mut output = fs::File::create(&output_path).unwrap();
1075
1076 let result = process_file(base_path, &nonexistent_file, &mut output, false, None);
1078 assert!(result.is_ok());
1079
1080 let content = fs::read_to_string(&output_path).unwrap();
1082 assert!(content.is_empty() || content.trim().is_empty());
1083 }
1084
1085 #[test]
1086 fn test_process_file_with_different_extensions() {
1087 let dir = tempdir().unwrap();
1088 let base_path = dir.path();
1089 let output_path = base_path.join("output.md");
1090
1091 let test_files = [
1093 ("script.py", "print('hello')", "python"),
1094 ("data.json", r#"{"key": "value"}"#, "json"),
1095 ("config.yaml", "key: value", "yaml"),
1096 ("style.css", "body { margin: 0; }", "css"),
1097 ("page.html", "<html><body>Test</body></html>", "html"),
1098 ("query.sql", "SELECT * FROM users;", "sql"),
1099 ("build.sh", "#!/bin/bash\necho 'building'", "bash"),
1100 ("unknown.xyz", "unknown content", "xyz"),
1101 ];
1102
1103 for (filename, content, expected_lang) in test_files.iter() {
1104 let file_path = base_path.join(filename);
1105 fs::write(&file_path, content).unwrap();
1106
1107 let mut output = fs::File::create(&output_path).unwrap();
1108 process_file(base_path, &file_path, &mut output, false, None).unwrap();
1109
1110 let result = fs::read_to_string(&output_path).unwrap();
1111 assert!(result.contains(&format!("```{}", expected_lang)));
1112 assert!(result.contains(content));
1113 assert!(result.contains(filename));
1114 }
1115 }
1116}