1use crate::layout::{
7 TextBlock, TextLine, cluster_lines_into_blocks, cluster_words_into_lines,
8 sort_blocks_reading_order, split_lines_at_columns,
9};
10use crate::table::Table;
11use crate::text::Char;
12use crate::words::{Word, WordExtractor, WordOptions};
13
14#[derive(Debug, Clone)]
16pub struct HtmlOptions {
17 pub y_tolerance: f64,
19 pub y_density: f64,
21 pub x_density: f64,
23 pub heading_min_ratio: f64,
25 pub detect_lists: bool,
27 pub detect_emphasis: bool,
29}
30
31impl Default for HtmlOptions {
32 fn default() -> Self {
33 Self {
34 y_tolerance: 3.0,
35 y_density: 10.0,
36 x_density: 10.0,
37 heading_min_ratio: 1.2,
38 detect_lists: true,
39 detect_emphasis: true,
40 }
41 }
42}
43
44#[derive(Debug, Clone, PartialEq)]
46enum HtmlElement {
47 Heading { level: u8, text: String },
49 Paragraph(String),
51 Table(String),
53 ListItem {
55 ordered: bool,
57 text: String,
59 },
60}
61
62pub struct HtmlRenderer;
64
65impl HtmlRenderer {
66 pub fn render(chars: &[Char], tables: &[Table], options: &HtmlOptions) -> String {
75 if chars.is_empty() && tables.is_empty() {
76 return String::new();
77 }
78
79 let words = WordExtractor::extract(
80 chars,
81 &WordOptions {
82 y_tolerance: options.y_tolerance,
83 ..WordOptions::default()
84 },
85 );
86
87 let lines = cluster_words_into_lines(&words, options.y_tolerance);
88 let split = split_lines_at_columns(lines, options.x_density);
89 let mut blocks = cluster_lines_into_blocks(split, options.y_density);
90 sort_blocks_reading_order(&mut blocks, options.x_density);
91
92 let median_size = compute_median_font_size(chars);
93
94 let mut elements = classify_blocks(&blocks, median_size, options);
95
96 for table in tables {
98 let table_html = table_to_html(table);
99 elements.push(HtmlElement::Table(table_html));
100 }
101
102 render_elements(&elements)
103 }
104
105 pub fn render_text(chars: &[Char], options: &HtmlOptions) -> String {
107 Self::render(chars, &[], options)
108 }
109
110 pub fn table_to_html(table: &Table) -> String {
112 table_to_html(table)
113 }
114
115 pub fn detect_heading_level(font_size: f64, median_size: f64, min_ratio: f64) -> Option<u8> {
120 detect_heading_level(font_size, median_size, min_ratio)
121 }
122}
123
124fn compute_median_font_size(chars: &[Char]) -> f64 {
126 if chars.is_empty() {
127 return 12.0;
128 }
129
130 let mut sizes: Vec<f64> = chars
131 .iter()
132 .filter(|c| c.size > 0.0 && !c.text.trim().is_empty())
133 .map(|c| c.size)
134 .collect();
135
136 if sizes.is_empty() {
137 return 12.0;
138 }
139
140 sizes.sort_by(|a, b| a.partial_cmp(b).unwrap());
141 let mid = sizes.len() / 2;
142 if sizes.len() % 2 == 0 {
143 (sizes[mid - 1] + sizes[mid]) / 2.0
144 } else {
145 sizes[mid]
146 }
147}
148
149fn detect_heading_level(font_size: f64, median_size: f64, min_ratio: f64) -> Option<u8> {
151 if median_size <= 0.0 || font_size <= 0.0 {
152 return None;
153 }
154
155 let ratio = font_size / median_size;
156 if ratio < min_ratio {
157 return None;
158 }
159
160 if ratio >= 2.0 {
161 Some(1)
162 } else if ratio >= 1.6 {
163 Some(2)
164 } else if ratio >= 1.3 {
165 Some(3)
166 } else {
167 Some(4)
168 }
169}
170
171fn detect_list_item(text: &str) -> Option<(bool, String)> {
173 let trimmed = text.trim_start();
174
175 for prefix in &["- ", "* ", "\u{2022} ", "\u{2013} ", "\u{2014} "] {
177 if let Some(rest) = trimmed.strip_prefix(prefix) {
178 return Some((false, rest.to_string()));
179 }
180 }
181
182 let bytes = trimmed.as_bytes();
184 if !bytes.is_empty() {
185 let mut i = 0;
186 while i < bytes.len() && bytes[i].is_ascii_digit() {
187 i += 1;
188 }
189 if i > 0 && i + 1 < bytes.len() {
190 let sep = bytes[i];
191 let space = bytes[i + 1];
192 if (sep == b'.' || sep == b')') && space == b' ' {
193 let rest = &trimmed[i + 2..];
194 return Some((true, rest.to_string()));
195 }
196 }
197 }
198
199 None
200}
201
202fn block_dominant_size(block: &TextBlock) -> f64 {
204 let mut sizes: Vec<f64> = Vec::new();
205 for line in &block.lines {
206 for word in &line.words {
207 for ch in &word.chars {
208 if ch.size > 0.0 && !ch.text.trim().is_empty() {
209 sizes.push(ch.size);
210 }
211 }
212 }
213 }
214 if sizes.is_empty() {
215 return 0.0;
216 }
217
218 sizes.sort_by(|a, b| a.partial_cmp(b).unwrap());
219 let mut best_size = sizes[0];
220 let mut best_count = 1;
221 let mut current_count = 1;
222 for i in 1..sizes.len() {
223 if (sizes[i] - sizes[i - 1]).abs() < 0.1 {
224 current_count += 1;
225 } else {
226 if current_count > best_count {
227 best_count = current_count;
228 best_size = sizes[i - 1];
229 }
230 current_count = 1;
231 }
232 }
233 if current_count > best_count {
234 best_size = *sizes.last().unwrap();
235 }
236 best_size
237}
238
239fn is_bold_font(fontname: &str) -> bool {
241 let lower = fontname.to_lowercase();
242 lower.contains("bold") || lower.contains("heavy") || lower.contains("black")
243}
244
245fn is_italic_font(fontname: &str) -> bool {
247 let lower = fontname.to_lowercase();
248 lower.contains("italic") || lower.contains("oblique")
249}
250
251fn word_dominant_font(word: &Word) -> &str {
253 word.chars
254 .iter()
255 .find(|c| !c.text.trim().is_empty())
256 .map(|c| c.fontname.as_str())
257 .unwrap_or("")
258}
259
260fn escape_html(text: &str) -> String {
262 text.replace('&', "&")
263 .replace('<', "<")
264 .replace('>', ">")
265 .replace('"', """)
266}
267
268fn classify_blocks(
270 blocks: &[TextBlock],
271 median_size: f64,
272 options: &HtmlOptions,
273) -> Vec<HtmlElement> {
274 let mut elements = Vec::new();
275
276 for block in blocks {
277 let block_text = block_to_text(block);
278 if block_text.trim().is_empty() {
279 continue;
280 }
281
282 let dominant_size = block_dominant_size(block);
283
284 if let Some(level) =
286 detect_heading_level(dominant_size, median_size, options.heading_min_ratio)
287 {
288 let is_short =
289 block.lines.len() <= 2 && block.lines.iter().all(|l| l.words.len() <= 15);
290 if is_short {
291 let text = escape_html(block_text.trim());
292 elements.push(HtmlElement::Heading { level, text });
293 continue;
294 }
295 }
296
297 if options.detect_lists {
299 let line_texts: Vec<String> = block.lines.iter().map(line_to_text).collect();
300 let all_list_items = line_texts.iter().all(|t| detect_list_item(t).is_some());
301 if all_list_items && !line_texts.is_empty() {
302 for text in &line_texts {
303 if let Some((ordered, rest)) = detect_list_item(text) {
304 elements.push(HtmlElement::ListItem {
305 ordered,
306 text: escape_html(&rest),
307 });
308 }
309 }
310 continue;
311 }
312 }
313
314 let rendered_text = if options.detect_emphasis {
316 render_block_with_emphasis(block)
317 } else {
318 escape_html(&block_text)
319 };
320
321 elements.push(HtmlElement::Paragraph(rendered_text.trim().to_string()));
322 }
323
324 elements
325}
326
327fn block_to_text(block: &TextBlock) -> String {
329 block
330 .lines
331 .iter()
332 .map(line_to_text)
333 .collect::<Vec<_>>()
334 .join("\n")
335}
336
337fn line_to_text(line: &TextLine) -> String {
339 line.words
340 .iter()
341 .map(|w| w.text.as_str())
342 .collect::<Vec<_>>()
343 .join(" ")
344}
345
346fn render_block_with_emphasis(block: &TextBlock) -> String {
348 block
349 .lines
350 .iter()
351 .map(render_line_with_emphasis)
352 .collect::<Vec<_>>()
353 .join("\n")
354}
355
356fn render_line_with_emphasis(line: &TextLine) -> String {
358 let mut parts: Vec<String> = Vec::new();
359
360 for word in &line.words {
361 let font = word_dominant_font(word);
362 let bold = is_bold_font(font);
363 let italic = is_italic_font(font);
364 let text = escape_html(&word.text);
365
366 if bold && italic {
367 parts.push(format!("<strong><em>{text}</em></strong>"));
368 } else if bold {
369 parts.push(format!("<strong>{text}</strong>"));
370 } else if italic {
371 parts.push(format!("<em>{text}</em>"));
372 } else {
373 parts.push(text);
374 }
375 }
376
377 parts.join(" ")
378}
379
380fn table_to_html(table: &Table) -> String {
382 if table.rows.is_empty() {
383 return String::new();
384 }
385
386 let mut html = String::from("<table>\n");
387
388 for (i, row) in table.rows.iter().enumerate() {
389 if i == 0 {
390 html.push_str("<thead>\n<tr>");
391 for cell in row {
392 let text = escape_html(cell.text.as_deref().unwrap_or(""));
393 html.push_str(&format!("<th>{text}</th>"));
394 }
395 html.push_str("</tr>\n</thead>\n<tbody>\n");
396 } else {
397 html.push_str("<tr>");
398 for cell in row {
399 let text = escape_html(cell.text.as_deref().unwrap_or(""));
400 html.push_str(&format!("<td>{text}</td>"));
401 }
402 html.push_str("</tr>\n");
403 }
404 }
405
406 html.push_str("</tbody>\n</table>");
407 html
408}
409
410fn render_elements(elements: &[HtmlElement]) -> String {
412 let mut parts: Vec<String> = Vec::new();
413 let mut i = 0;
414
415 while i < elements.len() {
416 match &elements[i] {
417 HtmlElement::Heading { level, text } => {
418 parts.push(format!("<h{level}>{text}</h{level}>"));
419 i += 1;
420 }
421 HtmlElement::Paragraph(text) => {
422 parts.push(format!("<p>{text}</p>"));
423 i += 1;
424 }
425 HtmlElement::Table(html) => {
426 parts.push(html.clone());
427 i += 1;
428 }
429 HtmlElement::ListItem { ordered, .. } => {
430 let is_ordered = *ordered;
432 let tag = if is_ordered { "ol" } else { "ul" };
433 let mut items = Vec::new();
434 while i < elements.len() {
435 if let HtmlElement::ListItem { ordered, text } = &elements[i] {
436 if *ordered == is_ordered {
437 items.push(format!("<li>{text}</li>"));
438 i += 1;
439 } else {
440 break;
441 }
442 } else {
443 break;
444 }
445 }
446 parts.push(format!("<{tag}>\n{}\n</{tag}>", items.join("\n")));
447 }
448 }
449 }
450
451 parts.join("\n")
452}
453
454#[cfg(test)]
455mod tests {
456 use super::*;
457 use crate::geometry::BBox;
458 use crate::table::Cell;
459 use crate::text::TextDirection;
460
461 fn make_char(text: &str, x0: f64, top: f64, x1: f64, bottom: f64, size: f64) -> Char {
462 Char {
463 text: text.to_string(),
464 bbox: BBox::new(x0, top, x1, bottom),
465 fontname: "Helvetica".to_string(),
466 size,
467 doctop: top,
468 upright: true,
469 direction: TextDirection::Ltr,
470 stroking_color: None,
471 non_stroking_color: None,
472 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
473 char_code: 0,
474 mcid: None,
475 tag: None,
476 }
477 }
478
479 fn make_word_from_text(
480 text: &str,
481 x0: f64,
482 top: f64,
483 x1: f64,
484 bottom: f64,
485 size: f64,
486 fontname: &str,
487 ) -> Word {
488 let chars: Vec<Char> = text
489 .chars()
490 .enumerate()
491 .map(|(i, c)| {
492 let char_width = (x1 - x0) / text.len() as f64;
493 let cx0 = x0 + i as f64 * char_width;
494 let cx1 = cx0 + char_width;
495 Char {
496 text: c.to_string(),
497 bbox: BBox::new(cx0, top, cx1, bottom),
498 fontname: fontname.to_string(),
499 size,
500 doctop: top,
501 upright: true,
502 direction: TextDirection::Ltr,
503 stroking_color: None,
504 non_stroking_color: None,
505 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
506 char_code: 0,
507 mcid: None,
508 tag: None,
509 }
510 })
511 .collect();
512 Word {
513 text: text.to_string(),
514 bbox: BBox::new(x0, top, x1, bottom),
515 doctop: top,
516 direction: TextDirection::Ltr,
517 chars,
518 }
519 }
520
521 #[test]
524 fn test_heading_h1() {
525 assert_eq!(detect_heading_level(24.0, 12.0, 1.2), Some(1));
526 }
527
528 #[test]
529 fn test_heading_h2() {
530 assert_eq!(detect_heading_level(20.0, 12.0, 1.2), Some(2));
531 }
532
533 #[test]
534 fn test_heading_h3() {
535 assert_eq!(detect_heading_level(16.0, 12.0, 1.2), Some(3));
536 }
537
538 #[test]
539 fn test_heading_h4() {
540 assert_eq!(detect_heading_level(14.5, 12.0, 1.2), Some(4));
541 }
542
543 #[test]
544 fn test_no_heading_normal_size() {
545 assert_eq!(detect_heading_level(12.0, 12.0, 1.2), None);
546 }
547
548 #[test]
549 fn test_heading_zero_median() {
550 assert_eq!(detect_heading_level(12.0, 0.0, 1.2), None);
551 }
552
553 #[test]
556 fn test_escape_html_ampersand() {
557 assert_eq!(escape_html("A & B"), "A & B");
558 }
559
560 #[test]
561 fn test_escape_html_angle_brackets() {
562 assert_eq!(escape_html("<div>"), "<div>");
563 }
564
565 #[test]
566 fn test_escape_html_quotes() {
567 assert_eq!(escape_html("say \"hello\""), "say "hello"");
568 }
569
570 #[test]
571 fn test_escape_html_combined() {
572 assert_eq!(escape_html("a < b & c > d"), "a < b & c > d");
573 }
574
575 #[test]
578 fn test_table_to_html_simple() {
579 let table = Table {
580 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
581 cells: vec![],
582 rows: vec![
583 vec![
584 Cell {
585 bbox: BBox::new(0.0, 0.0, 50.0, 25.0),
586 text: Some("Name".to_string()),
587 },
588 Cell {
589 bbox: BBox::new(50.0, 0.0, 100.0, 25.0),
590 text: Some("Age".to_string()),
591 },
592 ],
593 vec![
594 Cell {
595 bbox: BBox::new(0.0, 25.0, 50.0, 50.0),
596 text: Some("Alice".to_string()),
597 },
598 Cell {
599 bbox: BBox::new(50.0, 25.0, 100.0, 50.0),
600 text: Some("30".to_string()),
601 },
602 ],
603 ],
604 columns: vec![],
605 };
606 let html = table_to_html(&table);
607 assert!(html.contains("<table>"));
608 assert!(html.contains("<thead>"));
609 assert!(html.contains("<th>Name</th>"));
610 assert!(html.contains("<th>Age</th>"));
611 assert!(html.contains("</thead>"));
612 assert!(html.contains("<tbody>"));
613 assert!(html.contains("<td>Alice</td>"));
614 assert!(html.contains("<td>30</td>"));
615 assert!(html.contains("</tbody>"));
616 assert!(html.contains("</table>"));
617 }
618
619 #[test]
620 fn test_table_to_html_with_none_cells() {
621 let table = Table {
622 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
623 cells: vec![],
624 rows: vec![
625 vec![
626 Cell {
627 bbox: BBox::new(0.0, 0.0, 50.0, 25.0),
628 text: Some("Header".to_string()),
629 },
630 Cell {
631 bbox: BBox::new(50.0, 0.0, 100.0, 25.0),
632 text: None,
633 },
634 ],
635 vec![
636 Cell {
637 bbox: BBox::new(0.0, 25.0, 50.0, 50.0),
638 text: None,
639 },
640 Cell {
641 bbox: BBox::new(50.0, 25.0, 100.0, 50.0),
642 text: Some("Data".to_string()),
643 },
644 ],
645 ],
646 columns: vec![],
647 };
648 let html = table_to_html(&table);
649 assert!(html.contains("<th>Header</th>"));
650 assert!(html.contains("<th></th>"));
651 assert!(html.contains("<td></td>"));
652 assert!(html.contains("<td>Data</td>"));
653 }
654
655 #[test]
656 fn test_table_to_html_empty() {
657 let table = Table {
658 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
659 cells: vec![],
660 rows: vec![],
661 columns: vec![],
662 };
663 assert_eq!(table_to_html(&table), "");
664 }
665
666 #[test]
667 fn test_table_to_html_escapes_html() {
668 let table = Table {
669 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
670 cells: vec![],
671 rows: vec![
672 vec![Cell {
673 bbox: BBox::new(0.0, 0.0, 100.0, 25.0),
674 text: Some("A<B>".to_string()),
675 }],
676 vec![Cell {
677 bbox: BBox::new(0.0, 25.0, 100.0, 50.0),
678 text: Some("C&D".to_string()),
679 }],
680 ],
681 columns: vec![],
682 };
683 let html = table_to_html(&table);
684 assert!(html.contains("A<B>"));
685 assert!(html.contains("C&D"));
686 }
687
688 #[test]
691 fn test_render_simple_paragraph() {
692 let chars = vec![
693 make_char("H", 0.0, 0.0, 8.0, 12.0, 12.0),
694 make_char("e", 8.0, 0.0, 16.0, 12.0, 12.0),
695 make_char("l", 16.0, 0.0, 24.0, 12.0, 12.0),
696 make_char("l", 24.0, 0.0, 32.0, 12.0, 12.0),
697 make_char("o", 32.0, 0.0, 40.0, 12.0, 12.0),
698 make_char(" ", 40.0, 0.0, 44.0, 12.0, 12.0),
699 make_char("W", 44.0, 0.0, 52.0, 12.0, 12.0),
700 make_char("o", 52.0, 0.0, 60.0, 12.0, 12.0),
701 make_char("r", 60.0, 0.0, 68.0, 12.0, 12.0),
702 make_char("l", 68.0, 0.0, 76.0, 12.0, 12.0),
703 make_char("d", 76.0, 0.0, 84.0, 12.0, 12.0),
704 ];
705 let result = HtmlRenderer::render_text(&chars, &HtmlOptions::default());
706 assert!(
707 result.contains("<p>Hello World</p>"),
708 "Expected paragraph wrapping, got: {result}"
709 );
710 }
711
712 #[test]
713 fn test_render_heading_detection() {
714 let mut chars = Vec::new();
715 for (i, c) in "Title".chars().enumerate() {
717 chars.push(make_char(
718 &c.to_string(),
719 i as f64 * 16.0,
720 0.0,
721 (i + 1) as f64 * 16.0,
722 24.0,
723 24.0,
724 ));
725 }
726 for (i, c) in "Body text here".chars().enumerate() {
728 let x0 = i as f64 * 8.0;
729 chars.push(make_char(&c.to_string(), x0, 40.0, x0 + 8.0, 52.0, 12.0));
730 }
731 let result = HtmlRenderer::render_text(&chars, &HtmlOptions::default());
732 assert!(
733 result.contains("<h1>Title</h1>"),
734 "Expected H1 heading, got: {result}"
735 );
736 assert!(
737 result.contains("Body text here"),
738 "Expected body text, got: {result}"
739 );
740 }
741
742 #[test]
743 fn test_render_empty_input() {
744 let result = HtmlRenderer::render(&[], &[], &HtmlOptions::default());
745 assert_eq!(result, "");
746 }
747
748 #[test]
751 fn test_bold_font_detection() {
752 assert!(is_bold_font("Helvetica-Bold"));
753 assert!(is_bold_font("TimesNewRoman-BoldItalic"));
754 assert!(!is_bold_font("Helvetica"));
755 assert!(!is_bold_font("Times-Roman"));
756 }
757
758 #[test]
759 fn test_italic_font_detection() {
760 assert!(is_italic_font("Helvetica-Oblique"));
761 assert!(is_italic_font("Times-Italic"));
762 assert!(!is_italic_font("Helvetica"));
763 assert!(!is_italic_font("Helvetica-Bold"));
764 }
765
766 #[test]
767 fn test_render_line_with_emphasis() {
768 let line = TextLine {
769 words: vec![
770 make_word_from_text("normal", 0.0, 0.0, 48.0, 12.0, 12.0, "Helvetica"),
771 make_word_from_text("bold", 52.0, 0.0, 88.0, 12.0, 12.0, "Helvetica-Bold"),
772 make_word_from_text("italic", 92.0, 0.0, 140.0, 12.0, 12.0, "Helvetica-Oblique"),
773 ],
774 bbox: BBox::new(0.0, 0.0, 140.0, 12.0),
775 };
776 let result = render_line_with_emphasis(&line);
777 assert_eq!(result, "normal <strong>bold</strong> <em>italic</em>");
778 }
779
780 #[test]
781 fn test_render_bold_italic_combined() {
782 let line = TextLine {
783 words: vec![make_word_from_text(
784 "emphasis",
785 0.0,
786 0.0,
787 64.0,
788 12.0,
789 12.0,
790 "Helvetica-BoldOblique",
791 )],
792 bbox: BBox::new(0.0, 0.0, 64.0, 12.0),
793 };
794 let result = render_line_with_emphasis(&line);
795 assert_eq!(result, "<strong><em>emphasis</em></strong>");
796 }
797
798 #[test]
801 fn test_html_options_default() {
802 let opts = HtmlOptions::default();
803 assert_eq!(opts.y_tolerance, 3.0);
804 assert_eq!(opts.y_density, 10.0);
805 assert_eq!(opts.x_density, 10.0);
806 assert_eq!(opts.heading_min_ratio, 1.2);
807 assert!(opts.detect_lists);
808 assert!(opts.detect_emphasis);
809 }
810
811 #[test]
814 fn test_detect_bullet_list() {
815 let result = detect_list_item("- item text");
816 assert_eq!(result, Some((false, "item text".to_string())));
817 }
818
819 #[test]
820 fn test_detect_numbered_list() {
821 let result = detect_list_item("1. first item");
822 assert_eq!(result, Some((true, "first item".to_string())));
823 }
824
825 #[test]
826 fn test_detect_no_list() {
827 assert_eq!(detect_list_item("Just normal text"), None);
828 }
829
830 #[test]
833 fn test_render_heading_and_paragraph() {
834 let elements = vec![
835 HtmlElement::Heading {
836 level: 1,
837 text: "My Title".to_string(),
838 },
839 HtmlElement::Paragraph("Some body text.".to_string()),
840 ];
841 let result = render_elements(&elements);
842 assert_eq!(result, "<h1>My Title</h1>\n<p>Some body text.</p>");
843 }
844
845 #[test]
846 fn test_render_unordered_list() {
847 let elements = vec![
848 HtmlElement::ListItem {
849 ordered: false,
850 text: "first".to_string(),
851 },
852 HtmlElement::ListItem {
853 ordered: false,
854 text: "second".to_string(),
855 },
856 ];
857 let result = render_elements(&elements);
858 assert_eq!(result, "<ul>\n<li>first</li>\n<li>second</li>\n</ul>");
859 }
860
861 #[test]
862 fn test_render_ordered_list() {
863 let elements = vec![
864 HtmlElement::ListItem {
865 ordered: true,
866 text: "first".to_string(),
867 },
868 HtmlElement::ListItem {
869 ordered: true,
870 text: "second".to_string(),
871 },
872 ];
873 let result = render_elements(&elements);
874 assert_eq!(result, "<ol>\n<li>first</li>\n<li>second</li>\n</ol>");
875 }
876
877 #[test]
878 fn test_render_with_table() {
879 let table = Table {
880 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
881 cells: vec![],
882 rows: vec![
883 vec![
884 Cell {
885 bbox: BBox::new(0.0, 0.0, 50.0, 25.0),
886 text: Some("Col1".to_string()),
887 },
888 Cell {
889 bbox: BBox::new(50.0, 0.0, 100.0, 25.0),
890 text: Some("Col2".to_string()),
891 },
892 ],
893 vec![
894 Cell {
895 bbox: BBox::new(0.0, 25.0, 50.0, 50.0),
896 text: Some("A".to_string()),
897 },
898 Cell {
899 bbox: BBox::new(50.0, 25.0, 100.0, 50.0),
900 text: Some("B".to_string()),
901 },
902 ],
903 ],
904 columns: vec![],
905 };
906 let result = HtmlRenderer::render(&[], &[table], &HtmlOptions::default());
907 assert!(result.contains("<table>"));
908 assert!(result.contains("<th>Col1</th>"));
909 assert!(result.contains("<td>A</td>"));
910 assert!(result.contains("</table>"));
911 }
912
913 #[test]
914 fn test_table_single_row() {
915 let table = Table {
916 bbox: BBox::new(0.0, 0.0, 100.0, 25.0),
917 cells: vec![],
918 rows: vec![vec![
919 Cell {
920 bbox: BBox::new(0.0, 0.0, 50.0, 25.0),
921 text: Some("Only".to_string()),
922 },
923 Cell {
924 bbox: BBox::new(50.0, 0.0, 100.0, 25.0),
925 text: Some("Row".to_string()),
926 },
927 ]],
928 columns: vec![],
929 };
930 let html = table_to_html(&table);
931 assert!(html.contains("<th>Only</th>"));
932 assert!(html.contains("<th>Row</th>"));
933 assert!(html.contains("<tbody>"));
935 }
936
937 #[test]
938 fn test_median_font_size_empty() {
939 assert_eq!(compute_median_font_size(&[]), 12.0);
940 }
941
942 #[test]
943 fn test_median_font_size_single() {
944 let chars = vec![make_char("A", 0.0, 0.0, 10.0, 12.0, 14.0)];
945 assert_eq!(compute_median_font_size(&chars), 14.0);
946 }
947
948 #[test]
949 fn test_block_dominant_size() {
950 let block = TextBlock {
951 lines: vec![TextLine {
952 words: vec![make_word_from_text(
953 "Hello",
954 0.0,
955 0.0,
956 40.0,
957 12.0,
958 14.0,
959 "Helvetica",
960 )],
961 bbox: BBox::new(0.0, 0.0, 40.0, 12.0),
962 }],
963 bbox: BBox::new(0.0, 0.0, 40.0, 12.0),
964 };
965 assert_eq!(block_dominant_size(&block), 14.0);
966 }
967
968 #[test]
971 fn test_render_list_items_as_html() {
972 let mut chars = Vec::new();
973 for (i, c) in "- first item".chars().enumerate() {
974 let x0 = i as f64 * 8.0;
975 chars.push(make_char(&c.to_string(), x0, 0.0, x0 + 8.0, 12.0, 12.0));
976 }
977 for (i, c) in "- second item".chars().enumerate() {
978 let x0 = i as f64 * 8.0;
979 chars.push(make_char(&c.to_string(), x0, 15.0, x0 + 8.0, 27.0, 12.0));
980 }
981 let result = HtmlRenderer::render_text(&chars, &HtmlOptions::default());
982 assert!(
983 result.contains("<ul>"),
984 "Expected unordered list, got: {result}"
985 );
986 assert!(
987 result.contains("<li>first item</li>"),
988 "Expected first list item, got: {result}"
989 );
990 assert!(
991 result.contains("<li>second item</li>"),
992 "Expected second list item, got: {result}"
993 );
994 assert!(
995 result.contains("</ul>"),
996 "Expected closing ul tag, got: {result}"
997 );
998 }
999
1000 #[test]
1001 fn test_heading_html_escapes_content() {
1002 let elements = vec![HtmlElement::Heading {
1003 level: 2,
1004 text: "A & B".to_string(),
1005 }];
1006 let result = render_elements(&elements);
1007 assert_eq!(result, "<h2>A & B</h2>");
1008 }
1009
1010 #[test]
1011 fn test_paragraph_html_wrapping() {
1012 let elements = vec![HtmlElement::Paragraph("Hello world".to_string())];
1013 let result = render_elements(&elements);
1014 assert_eq!(result, "<p>Hello world</p>");
1015 }
1016}