1use crate::layout::{
7 TextBlock, TextLine, cluster_lines_into_blocks, cluster_words_into_lines,
8 sort_blocks_reading_order, split_lines_at_columns,
9};
10use crate::table::Table;
11use crate::text::Char;
12use crate::words::{Word, WordExtractor, WordOptions};
13
14#[derive(Debug, Clone)]
16pub struct MarkdownOptions {
17 pub y_tolerance: f64,
19 pub y_density: f64,
21 pub x_density: f64,
23 pub heading_min_ratio: f64,
26 pub detect_lists: bool,
28 pub detect_emphasis: bool,
30}
31
32impl Default for MarkdownOptions {
33 fn default() -> Self {
34 Self {
35 y_tolerance: 3.0,
36 y_density: 10.0,
37 x_density: 10.0,
38 heading_min_ratio: 1.2,
39 detect_lists: true,
40 detect_emphasis: true,
41 }
42 }
43}
44
45#[derive(Debug, Clone, PartialEq)]
47enum ContentElement {
48 Heading { level: u8, text: String },
50 Paragraph(String),
52 Table(String),
54 ListItem {
56 prefix: String,
58 text: String,
60 },
61}
62
63pub struct MarkdownRenderer;
65
66impl MarkdownRenderer {
67 pub fn render(chars: &[Char], tables: &[Table], options: &MarkdownOptions) -> String {
76 if chars.is_empty() && tables.is_empty() {
77 return String::new();
78 }
79
80 let words = WordExtractor::extract(
81 chars,
82 &WordOptions {
83 y_tolerance: options.y_tolerance,
84 ..WordOptions::default()
85 },
86 );
87
88 let lines = cluster_words_into_lines(&words, options.y_tolerance);
89 let split = split_lines_at_columns(lines, options.x_density);
90 let mut blocks = cluster_lines_into_blocks(split, options.y_density);
91 sort_blocks_reading_order(&mut blocks, options.x_density);
92
93 let median_size = compute_median_font_size(chars);
94
95 let mut elements = classify_blocks(&blocks, median_size, options);
97
98 for table in tables {
100 let table_md = table_to_gfm(table);
101 let table_top = table.bbox.top;
102 let insert_pos = elements
104 .iter()
105 .enumerate()
106 .rev()
107 .find(|(_, _)| true) .map(|(i, _)| i + 1)
109 .unwrap_or(0);
110 let _ = insert_pos;
112 let _ = table_top;
113 elements.push(ContentElement::Table(table_md));
114 }
115
116 render_elements(&elements)
118 }
119
120 pub fn render_text(chars: &[Char], options: &MarkdownOptions) -> String {
122 Self::render(chars, &[], options)
123 }
124
125 pub fn table_to_gfm(table: &Table) -> String {
127 table_to_gfm(table)
128 }
129
130 pub fn detect_heading_level(font_size: f64, median_size: f64, min_ratio: f64) -> Option<u8> {
135 detect_heading_level(font_size, median_size, min_ratio)
136 }
137
138 pub fn detect_list_item(text: &str) -> Option<(String, String)> {
143 detect_list_item(text)
144 }
145}
146
147fn compute_median_font_size(chars: &[Char]) -> f64 {
149 if chars.is_empty() {
150 return 12.0; }
152
153 let mut sizes: Vec<f64> = chars
154 .iter()
155 .filter(|c| c.size > 0.0 && !c.text.trim().is_empty())
156 .map(|c| c.size)
157 .collect();
158
159 if sizes.is_empty() {
160 return 12.0;
161 }
162
163 sizes.sort_by(|a, b| a.partial_cmp(b).unwrap());
164 let mid = sizes.len() / 2;
165 if sizes.len() % 2 == 0 {
166 (sizes[mid - 1] + sizes[mid]) / 2.0
167 } else {
168 sizes[mid]
169 }
170}
171
172fn detect_heading_level(font_size: f64, median_size: f64, min_ratio: f64) -> Option<u8> {
174 if median_size <= 0.0 || font_size <= 0.0 {
175 return None;
176 }
177
178 let ratio = font_size / median_size;
179 if ratio < min_ratio {
180 return None;
181 }
182
183 if ratio >= 2.0 {
189 Some(1)
190 } else if ratio >= 1.6 {
191 Some(2)
192 } else if ratio >= 1.3 {
193 Some(3)
194 } else {
195 Some(4)
196 }
197}
198
199fn detect_list_item(text: &str) -> Option<(String, String)> {
201 let trimmed = text.trim_start();
202
203 for prefix in &["- ", "* ", "• ", "– ", "— "] {
205 if let Some(rest) = trimmed.strip_prefix(prefix) {
206 return Some((prefix.to_string(), rest.to_string()));
207 }
208 }
209
210 if let Some(rest) = try_parse_numbered_list(trimmed) {
212 return Some(rest);
213 }
214
215 None
216}
217
218fn try_parse_numbered_list(text: &str) -> Option<(String, String)> {
220 let bytes = text.as_bytes();
221 if bytes.is_empty() {
222 return None;
223 }
224
225 let mut i = 0;
227 while i < bytes.len() && bytes[i].is_ascii_digit() {
228 i += 1;
229 }
230 if i == 0 || i >= bytes.len() {
231 return None;
232 }
233
234 if i + 1 < bytes.len() {
235 let sep = bytes[i];
236 let space = bytes[i + 1];
237 if (sep == b'.' || sep == b')') && space == b' ' {
238 let prefix = &text[..i + 2];
239 let rest = &text[i + 2..];
240 return Some((prefix.to_string(), rest.to_string()));
241 }
242 }
243
244 None
245}
246
247fn block_dominant_size(block: &TextBlock) -> f64 {
249 let mut sizes: Vec<f64> = Vec::new();
250 for line in &block.lines {
251 for word in &line.words {
252 for ch in &word.chars {
253 if ch.size > 0.0 && !ch.text.trim().is_empty() {
254 sizes.push(ch.size);
255 }
256 }
257 }
258 }
259 if sizes.is_empty() {
260 return 0.0;
261 }
262
263 sizes.sort_by(|a, b| a.partial_cmp(b).unwrap());
265 let mut best_size = sizes[0];
266 let mut best_count = 1;
267 let mut current_count = 1;
268 for i in 1..sizes.len() {
269 if (sizes[i] - sizes[i - 1]).abs() < 0.1 {
270 current_count += 1;
271 } else {
272 if current_count > best_count {
273 best_count = current_count;
274 best_size = sizes[i - 1];
275 }
276 current_count = 1;
277 }
278 }
279 if current_count > best_count {
280 best_size = *sizes.last().unwrap();
281 }
282 best_size
283}
284
285fn is_bold_font(fontname: &str) -> bool {
287 let lower = fontname.to_lowercase();
288 lower.contains("bold") || lower.contains("heavy") || lower.contains("black")
289}
290
291fn is_italic_font(fontname: &str) -> bool {
293 let lower = fontname.to_lowercase();
294 lower.contains("italic") || lower.contains("oblique")
295}
296
297fn word_dominant_font(word: &Word) -> &str {
299 if word.chars.is_empty() {
300 return "";
301 }
302 word.chars
304 .iter()
305 .find(|c| !c.text.trim().is_empty())
306 .map(|c| c.fontname.as_str())
307 .unwrap_or("")
308}
309
310fn classify_blocks(
312 blocks: &[TextBlock],
313 median_size: f64,
314 options: &MarkdownOptions,
315) -> Vec<ContentElement> {
316 let mut elements = Vec::new();
317
318 for block in blocks {
319 let block_text = block_to_text(block);
320 if block_text.trim().is_empty() {
321 continue;
322 }
323
324 let dominant_size = block_dominant_size(block);
325
326 if let Some(level) =
328 detect_heading_level(dominant_size, median_size, options.heading_min_ratio)
329 {
330 let is_short =
332 block.lines.len() <= 2 && block.lines.iter().all(|l| l.words.len() <= 15);
333 if is_short {
334 elements.push(ContentElement::Heading {
335 level,
336 text: block_text.trim().to_string(),
337 });
338 continue;
339 }
340 }
341
342 if options.detect_lists {
344 let line_texts: Vec<String> = block.lines.iter().map(line_to_text).collect();
345
346 let all_list_items = line_texts.iter().all(|t| detect_list_item(t).is_some());
347 if all_list_items && !line_texts.is_empty() {
348 for text in &line_texts {
349 if let Some((prefix, rest)) = detect_list_item(text) {
350 elements.push(ContentElement::ListItem { prefix, text: rest });
351 }
352 }
353 continue;
354 }
355 }
356
357 let rendered_text = if options.detect_emphasis {
359 render_block_with_emphasis(block)
360 } else {
361 block_text
362 };
363
364 elements.push(ContentElement::Paragraph(rendered_text.trim().to_string()));
365 }
366
367 elements
368}
369
370fn block_to_text(block: &TextBlock) -> String {
372 block
373 .lines
374 .iter()
375 .map(line_to_text)
376 .collect::<Vec<_>>()
377 .join("\n")
378}
379
380fn line_to_text(line: &TextLine) -> String {
382 line.words
383 .iter()
384 .map(|w| w.text.as_str())
385 .collect::<Vec<_>>()
386 .join(" ")
387}
388
389fn render_block_with_emphasis(block: &TextBlock) -> String {
391 block
392 .lines
393 .iter()
394 .map(render_line_with_emphasis)
395 .collect::<Vec<_>>()
396 .join("\n")
397}
398
399fn render_line_with_emphasis(line: &TextLine) -> String {
401 let mut parts: Vec<String> = Vec::new();
402
403 for word in &line.words {
404 let font = word_dominant_font(word);
405 let bold = is_bold_font(font);
406 let italic = is_italic_font(font);
407
408 let text = &word.text;
409 if bold && italic {
410 parts.push(format!("***{text}***"));
411 } else if bold {
412 parts.push(format!("**{text}**"));
413 } else if italic {
414 parts.push(format!("*{text}*"));
415 } else {
416 parts.push(text.clone());
417 }
418 }
419
420 parts.join(" ")
421}
422
423fn table_to_gfm(table: &Table) -> String {
425 if table.rows.is_empty() {
426 return String::new();
427 }
428
429 let mut lines = Vec::new();
430
431 for (i, row) in table.rows.iter().enumerate() {
432 let cells: Vec<String> = row
433 .iter()
434 .map(|cell| {
435 cell.text
436 .as_deref()
437 .unwrap_or("")
438 .replace('|', "\\|")
439 .replace('\n', " ")
440 })
441 .collect();
442
443 let line = format!("| {} |", cells.join(" | "));
444 lines.push(line);
445
446 if i == 0 {
448 let sep: Vec<&str> = cells.iter().map(|_| "---").collect();
449 lines.push(format!("| {} |", sep.join(" | ")));
450 }
451 }
452
453 lines.join("\n")
454}
455
456fn render_elements(elements: &[ContentElement]) -> String {
458 let mut parts: Vec<String> = Vec::new();
459
460 for element in elements {
461 match element {
462 ContentElement::Heading { level, text } => {
463 let hashes = "#".repeat(*level as usize);
464 parts.push(format!("{hashes} {text}"));
465 }
466 ContentElement::Paragraph(text) => {
467 parts.push(text.clone());
468 }
469 ContentElement::Table(md) => {
470 parts.push(md.clone());
471 }
472 ContentElement::ListItem { prefix, text } => {
473 let md_prefix = if prefix.starts_with(|c: char| c.is_ascii_digit()) {
475 prefix.clone()
476 } else {
477 "- ".to_string()
478 };
479 parts.push(format!("{md_prefix}{text}"));
480 }
481 }
482 }
483
484 parts.join("\n\n")
485}
486
487#[cfg(test)]
488mod tests {
489 use super::*;
490 use crate::geometry::BBox;
491 use crate::table::Cell;
492 use crate::text::TextDirection;
493
494 fn make_char(text: &str, x0: f64, top: f64, x1: f64, bottom: f64, size: f64) -> Char {
495 Char {
496 text: text.to_string(),
497 bbox: BBox::new(x0, top, x1, bottom),
498 fontname: "Helvetica".to_string(),
499 size,
500 doctop: top,
501 upright: true,
502 direction: TextDirection::Ltr,
503 stroking_color: None,
504 non_stroking_color: None,
505 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
506 char_code: 0,
507 mcid: None,
508 tag: None,
509 }
510 }
511
512 fn make_word_from_text(
513 text: &str,
514 x0: f64,
515 top: f64,
516 x1: f64,
517 bottom: f64,
518 size: f64,
519 fontname: &str,
520 ) -> Word {
521 let chars: Vec<Char> = text
522 .chars()
523 .enumerate()
524 .map(|(i, c)| {
525 let char_width = (x1 - x0) / text.len() as f64;
526 let cx0 = x0 + i as f64 * char_width;
527 let cx1 = cx0 + char_width;
528 Char {
529 text: c.to_string(),
530 bbox: BBox::new(cx0, top, cx1, bottom),
531 fontname: fontname.to_string(),
532 size,
533 doctop: top,
534 upright: true,
535 direction: TextDirection::Ltr,
536 stroking_color: None,
537 non_stroking_color: None,
538 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
539 char_code: 0,
540 mcid: None,
541 tag: None,
542 }
543 })
544 .collect();
545 Word {
546 text: text.to_string(),
547 bbox: BBox::new(x0, top, x1, bottom),
548 doctop: top,
549 direction: TextDirection::Ltr,
550 chars,
551 }
552 }
553
554 #[test]
557 fn test_detect_heading_h1() {
558 assert_eq!(detect_heading_level(24.0, 12.0, 1.2), Some(1));
559 }
560
561 #[test]
562 fn test_detect_heading_h2() {
563 assert_eq!(detect_heading_level(20.0, 12.0, 1.2), Some(2));
564 }
565
566 #[test]
567 fn test_detect_heading_h3() {
568 assert_eq!(detect_heading_level(16.0, 12.0, 1.2), Some(3));
569 }
570
571 #[test]
572 fn test_detect_heading_h4() {
573 assert_eq!(detect_heading_level(14.5, 12.0, 1.2), Some(4));
574 }
575
576 #[test]
577 fn test_detect_no_heading_normal_size() {
578 assert_eq!(detect_heading_level(12.0, 12.0, 1.2), None);
579 }
580
581 #[test]
582 fn test_detect_heading_zero_median() {
583 assert_eq!(detect_heading_level(12.0, 0.0, 1.2), None);
584 }
585
586 #[test]
587 fn test_detect_heading_zero_font_size() {
588 assert_eq!(detect_heading_level(0.0, 12.0, 1.2), None);
589 }
590
591 #[test]
594 fn test_detect_bullet_dash() {
595 let result = detect_list_item("- item text");
596 assert_eq!(result, Some(("- ".to_string(), "item text".to_string())));
597 }
598
599 #[test]
600 fn test_detect_bullet_asterisk() {
601 let result = detect_list_item("* item text");
602 assert_eq!(result, Some(("* ".to_string(), "item text".to_string())));
603 }
604
605 #[test]
606 fn test_detect_bullet_unicode() {
607 let result = detect_list_item("• item text");
608 assert_eq!(result, Some(("• ".to_string(), "item text".to_string())));
609 }
610
611 #[test]
612 fn test_detect_numbered_list_dot() {
613 let result = detect_list_item("1. first item");
614 assert_eq!(result, Some(("1. ".to_string(), "first item".to_string())));
615 }
616
617 #[test]
618 fn test_detect_numbered_list_paren() {
619 let result = detect_list_item("2) second item");
620 assert_eq!(result, Some(("2) ".to_string(), "second item".to_string())));
621 }
622
623 #[test]
624 fn test_detect_no_list_normal_text() {
625 assert_eq!(detect_list_item("Just normal text"), None);
626 }
627
628 #[test]
629 fn test_detect_no_list_empty() {
630 assert_eq!(detect_list_item(""), None);
631 }
632
633 #[test]
636 fn test_median_font_size_empty() {
637 assert_eq!(compute_median_font_size(&[]), 12.0);
638 }
639
640 #[test]
641 fn test_median_font_size_single() {
642 let chars = vec![make_char("A", 0.0, 0.0, 10.0, 12.0, 14.0)];
643 assert_eq!(compute_median_font_size(&chars), 14.0);
644 }
645
646 #[test]
647 fn test_median_font_size_odd_count() {
648 let chars = vec![
649 make_char("A", 0.0, 0.0, 10.0, 12.0, 10.0),
650 make_char("B", 10.0, 0.0, 20.0, 12.0, 12.0),
651 make_char("C", 20.0, 0.0, 30.0, 12.0, 14.0),
652 ];
653 assert_eq!(compute_median_font_size(&chars), 12.0);
654 }
655
656 #[test]
657 fn test_median_font_size_even_count() {
658 let chars = vec![
659 make_char("A", 0.0, 0.0, 10.0, 12.0, 10.0),
660 make_char("B", 10.0, 0.0, 20.0, 12.0, 14.0),
661 ];
662 assert_eq!(compute_median_font_size(&chars), 12.0);
663 }
664
665 #[test]
666 fn test_median_font_size_ignores_zero_size() {
667 let chars = vec![
668 make_char("A", 0.0, 0.0, 10.0, 12.0, 0.0),
669 make_char("B", 10.0, 0.0, 20.0, 12.0, 12.0),
670 make_char("C", 20.0, 0.0, 30.0, 12.0, 14.0),
671 ];
672 assert_eq!(compute_median_font_size(&chars), 13.0);
673 }
674
675 #[test]
678 fn test_table_to_gfm_simple() {
679 let table = Table {
680 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
681 cells: vec![],
682 rows: vec![
683 vec![
684 Cell {
685 bbox: BBox::new(0.0, 0.0, 50.0, 25.0),
686 text: Some("Name".to_string()),
687 },
688 Cell {
689 bbox: BBox::new(50.0, 0.0, 100.0, 25.0),
690 text: Some("Age".to_string()),
691 },
692 ],
693 vec![
694 Cell {
695 bbox: BBox::new(0.0, 25.0, 50.0, 50.0),
696 text: Some("Alice".to_string()),
697 },
698 Cell {
699 bbox: BBox::new(50.0, 25.0, 100.0, 50.0),
700 text: Some("30".to_string()),
701 },
702 ],
703 ],
704 columns: vec![],
705 };
706 let gfm = table_to_gfm(&table);
707 assert_eq!(gfm, "| Name | Age |\n| --- | --- |\n| Alice | 30 |");
708 }
709
710 #[test]
711 fn test_table_to_gfm_with_none_cells() {
712 let table = Table {
713 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
714 cells: vec![],
715 rows: vec![
716 vec![
717 Cell {
718 bbox: BBox::new(0.0, 0.0, 50.0, 25.0),
719 text: Some("Header".to_string()),
720 },
721 Cell {
722 bbox: BBox::new(50.0, 0.0, 100.0, 25.0),
723 text: None,
724 },
725 ],
726 vec![
727 Cell {
728 bbox: BBox::new(0.0, 25.0, 50.0, 50.0),
729 text: None,
730 },
731 Cell {
732 bbox: BBox::new(50.0, 25.0, 100.0, 50.0),
733 text: Some("Data".to_string()),
734 },
735 ],
736 ],
737 columns: vec![],
738 };
739 let gfm = table_to_gfm(&table);
740 assert_eq!(gfm, "| Header | |\n| --- | --- |\n| | Data |");
741 }
742
743 #[test]
744 fn test_table_to_gfm_empty_rows() {
745 let table = Table {
746 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
747 cells: vec![],
748 rows: vec![],
749 columns: vec![],
750 };
751 assert_eq!(table_to_gfm(&table), "");
752 }
753
754 #[test]
755 fn test_table_to_gfm_escapes_pipe() {
756 let table = Table {
757 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
758 cells: vec![],
759 rows: vec![
760 vec![Cell {
761 bbox: BBox::new(0.0, 0.0, 100.0, 25.0),
762 text: Some("A|B".to_string()),
763 }],
764 vec![Cell {
765 bbox: BBox::new(0.0, 25.0, 100.0, 50.0),
766 text: Some("C".to_string()),
767 }],
768 ],
769 columns: vec![],
770 };
771 let gfm = table_to_gfm(&table);
772 assert!(gfm.contains("A\\|B"));
773 }
774
775 #[test]
778 fn test_render_simple_paragraph() {
779 let chars = vec![
782 make_char("H", 0.0, 0.0, 8.0, 12.0, 12.0),
783 make_char("e", 8.0, 0.0, 16.0, 12.0, 12.0),
784 make_char("l", 16.0, 0.0, 24.0, 12.0, 12.0),
785 make_char("l", 24.0, 0.0, 32.0, 12.0, 12.0),
786 make_char("o", 32.0, 0.0, 40.0, 12.0, 12.0),
787 make_char(" ", 40.0, 0.0, 44.0, 12.0, 12.0),
788 make_char("W", 44.0, 0.0, 52.0, 12.0, 12.0),
789 make_char("o", 52.0, 0.0, 60.0, 12.0, 12.0),
790 make_char("r", 60.0, 0.0, 68.0, 12.0, 12.0),
791 make_char("l", 68.0, 0.0, 76.0, 12.0, 12.0),
792 make_char("d", 76.0, 0.0, 84.0, 12.0, 12.0),
793 ];
794 let result = MarkdownRenderer::render_text(&chars, &MarkdownOptions::default());
795 assert_eq!(result.trim(), "Hello World");
796 }
797
798 #[test]
799 fn test_render_heading_detection() {
800 let mut chars = Vec::new();
802 for (i, c) in "Title".chars().enumerate() {
804 chars.push(make_char(
805 &c.to_string(),
806 i as f64 * 16.0,
807 0.0,
808 (i + 1) as f64 * 16.0,
809 24.0,
810 24.0,
811 ));
812 }
813 for (i, c) in "Body text here".chars().enumerate() {
815 let x0 = i as f64 * 8.0;
816 if c == ' ' {
817 chars.push(make_char(" ", x0, 40.0, x0 + 8.0, 52.0, 12.0));
818 } else {
819 chars.push(make_char(&c.to_string(), x0, 40.0, x0 + 8.0, 52.0, 12.0));
820 }
821 }
822 let result = MarkdownRenderer::render_text(&chars, &MarkdownOptions::default());
823 assert!(
824 result.contains("# Title"),
825 "Expected H1 heading, got: {result}"
826 );
827 assert!(
828 result.contains("Body text here"),
829 "Expected body text, got: {result}"
830 );
831 }
832
833 #[test]
834 fn test_render_empty_input() {
835 let result = MarkdownRenderer::render(&[], &[], &MarkdownOptions::default());
836 assert_eq!(result, "");
837 }
838
839 #[test]
842 fn test_bold_font_detection() {
843 assert!(is_bold_font("Helvetica-Bold"));
844 assert!(is_bold_font("TimesNewRoman-BoldItalic"));
845 assert!(!is_bold_font("Helvetica"));
846 assert!(!is_bold_font("Times-Roman"));
847 }
848
849 #[test]
850 fn test_italic_font_detection() {
851 assert!(is_italic_font("Helvetica-Oblique"));
852 assert!(is_italic_font("Times-Italic"));
853 assert!(!is_italic_font("Helvetica"));
854 assert!(!is_italic_font("Helvetica-Bold"));
855 }
856
857 #[test]
858 fn test_render_with_emphasis() {
859 let line = TextLine {
860 words: vec![
861 make_word_from_text("normal", 0.0, 0.0, 48.0, 12.0, 12.0, "Helvetica"),
862 make_word_from_text("bold", 52.0, 0.0, 88.0, 12.0, 12.0, "Helvetica-Bold"),
863 make_word_from_text("italic", 92.0, 0.0, 140.0, 12.0, 12.0, "Helvetica-Oblique"),
864 ],
865 bbox: BBox::new(0.0, 0.0, 140.0, 12.0),
866 };
867 let result = render_line_with_emphasis(&line);
868 assert_eq!(result, "normal **bold** *italic*");
869 }
870
871 #[test]
874 fn test_markdown_options_default() {
875 let opts = MarkdownOptions::default();
876 assert_eq!(opts.y_tolerance, 3.0);
877 assert_eq!(opts.y_density, 10.0);
878 assert_eq!(opts.x_density, 10.0);
879 assert_eq!(opts.heading_min_ratio, 1.2);
880 assert!(opts.detect_lists);
881 assert!(opts.detect_emphasis);
882 }
883
884 #[test]
887 fn test_render_with_table() {
888 let table = Table {
889 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
890 cells: vec![],
891 rows: vec![
892 vec![
893 Cell {
894 bbox: BBox::new(0.0, 0.0, 50.0, 25.0),
895 text: Some("Col1".to_string()),
896 },
897 Cell {
898 bbox: BBox::new(50.0, 0.0, 100.0, 25.0),
899 text: Some("Col2".to_string()),
900 },
901 ],
902 vec![
903 Cell {
904 bbox: BBox::new(0.0, 25.0, 50.0, 50.0),
905 text: Some("A".to_string()),
906 },
907 Cell {
908 bbox: BBox::new(50.0, 25.0, 100.0, 50.0),
909 text: Some("B".to_string()),
910 },
911 ],
912 ],
913 columns: vec![],
914 };
915 let result = MarkdownRenderer::render(&[], &[table], &MarkdownOptions::default());
916 assert!(result.contains("| Col1 | Col2 |"));
917 assert!(result.contains("| --- | --- |"));
918 assert!(result.contains("| A | B |"));
919 }
920
921 #[test]
922 fn test_table_to_gfm_single_row() {
923 let table = Table {
924 bbox: BBox::new(0.0, 0.0, 100.0, 25.0),
925 cells: vec![],
926 rows: vec![vec![
927 Cell {
928 bbox: BBox::new(0.0, 0.0, 50.0, 25.0),
929 text: Some("Only".to_string()),
930 },
931 Cell {
932 bbox: BBox::new(50.0, 0.0, 100.0, 25.0),
933 text: Some("Row".to_string()),
934 },
935 ]],
936 columns: vec![],
937 };
938 let gfm = table_to_gfm(&table);
939 assert_eq!(gfm, "| Only | Row |\n| --- | --- |");
941 }
942
943 #[test]
944 fn test_render_list_items() {
945 let mut chars = Vec::new();
947 for (i, c) in "- first item".chars().enumerate() {
948 let x0 = i as f64 * 8.0;
949 chars.push(make_char(&c.to_string(), x0, 0.0, x0 + 8.0, 12.0, 12.0));
950 }
951 for (i, c) in "- second item".chars().enumerate() {
952 let x0 = i as f64 * 8.0;
953 chars.push(make_char(&c.to_string(), x0, 15.0, x0 + 8.0, 27.0, 12.0));
954 }
955 let result = MarkdownRenderer::render_text(&chars, &MarkdownOptions::default());
956 assert!(
957 result.contains("- first item"),
958 "Expected first list item, got: {result}"
959 );
960 assert!(
961 result.contains("- second item"),
962 "Expected second list item, got: {result}"
963 );
964 }
965
966 #[test]
967 fn test_detect_numbered_list_multi_digit() {
968 let result = detect_list_item("12. twelfth item");
969 assert_eq!(
970 result,
971 Some(("12. ".to_string(), "twelfth item".to_string()))
972 );
973 }
974
975 #[test]
976 fn test_block_dominant_size() {
977 let block = TextBlock {
978 lines: vec![TextLine {
979 words: vec![make_word_from_text(
980 "Hello",
981 0.0,
982 0.0,
983 40.0,
984 12.0,
985 14.0,
986 "Helvetica",
987 )],
988 bbox: BBox::new(0.0, 0.0, 40.0, 12.0),
989 }],
990 bbox: BBox::new(0.0, 0.0, 40.0, 12.0),
991 };
992 assert_eq!(block_dominant_size(&block), 14.0);
993 }
994
995 #[test]
996 fn test_render_elements_heading_and_paragraph() {
997 let elements = vec![
998 ContentElement::Heading {
999 level: 1,
1000 text: "My Title".to_string(),
1001 },
1002 ContentElement::Paragraph("Some body text.".to_string()),
1003 ];
1004 let result = render_elements(&elements);
1005 assert_eq!(result, "# My Title\n\nSome body text.");
1006 }
1007
1008 #[test]
1009 fn test_render_elements_list() {
1010 let elements = vec![
1011 ContentElement::ListItem {
1012 prefix: "- ".to_string(),
1013 text: "first".to_string(),
1014 },
1015 ContentElement::ListItem {
1016 prefix: "- ".to_string(),
1017 text: "second".to_string(),
1018 },
1019 ];
1020 let result = render_elements(&elements);
1021 assert_eq!(result, "- first\n\n- second");
1022 }
1023
1024 #[test]
1025 fn test_render_elements_numbered_list() {
1026 let elements = vec![
1027 ContentElement::ListItem {
1028 prefix: "1. ".to_string(),
1029 text: "first".to_string(),
1030 },
1031 ContentElement::ListItem {
1032 prefix: "2. ".to_string(),
1033 text: "second".to_string(),
1034 },
1035 ];
1036 let result = render_elements(&elements);
1037 assert_eq!(result, "1. first\n\n2. second");
1038 }
1039
1040 #[test]
1041 fn test_table_to_gfm_newline_in_cell() {
1042 let table = Table {
1043 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
1044 cells: vec![],
1045 rows: vec![
1046 vec![Cell {
1047 bbox: BBox::new(0.0, 0.0, 100.0, 25.0),
1048 text: Some("Header".to_string()),
1049 }],
1050 vec![Cell {
1051 bbox: BBox::new(0.0, 25.0, 100.0, 50.0),
1052 text: Some("Line1\nLine2".to_string()),
1053 }],
1054 ],
1055 columns: vec![],
1056 };
1057 let gfm = table_to_gfm(&table);
1058 assert!(gfm.contains("Line1 Line2"));
1060 let gfm_lines: Vec<&str> = gfm.lines().collect();
1062 assert_eq!(gfm_lines.len(), 3);
1063 }
1064}