1use super::text_region::TextRegion;
7use crate::processors::BoundingBox;
8use image::RgbImage;
9use once_cell::sync::Lazy;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12use std::path::Path;
13use std::sync::Arc;
14
15static TITLE_NUMBERING_REGEX: Lazy<Regex> = Lazy::new(|| {
18 Regex::new(
19 r"(?x)
20 ^\s*
21 (
22 # Arabic numerals: 1, 1.2, 1.2.3, etc.
23 [1-9][0-9]*(?:\.[1-9][0-9]*)*[\.、]?
24 |
25 # Parenthesized Arabic numerals: (1), (1.2), etc.
26 [((][1-9][0-9]*(?:\.[1-9][0-9]*)*[))]
27 |
28 # Chinese numerals with punctuation: 一、 二、
29 [一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾][、.]?
30 |
31 # Parenthesized Chinese numerals: (一)
32 [((][一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+[))]
33 |
34 # Roman numerals with delimiter (period or followed by space)
35 (?:I|II|III|IV|V|VI|VII|VIII|IX|X)(?:\.|\b)
36 )
37 (\s+)
38 (.*)
39 $
40 ",
41 )
42 .expect("Invalid title numbering regex")
43});
44
45fn format_title_with_level(title: &str) -> (usize, String) {
60 let cleaned = title.replace("-\n", "").replace('\n', " ");
62
63 if let Some(captures) = TITLE_NUMBERING_REGEX.captures(&cleaned) {
64 let numbering = captures.get(1).map(|m| m.as_str().trim()).unwrap_or("");
65 let title_content = captures.get(3).map(|m| m.as_str()).unwrap_or("");
66
67 let level = if numbering.contains('.') {
70 numbering.matches('.').count() + 1
71 } else {
72 1
73 };
74
75 let formatted = if title_content.is_empty() {
77 numbering.trim_end_matches('.').to_string()
78 } else {
79 format!(
80 "{} {}",
81 numbering.trim_end_matches('.'),
82 title_content.trim_start()
83 )
84 };
85
86 let level = level.clamp(1, 6);
88
89 (level, formatted)
90 } else {
91 (2, cleaned)
93 }
94}
95
96#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct RegionBlock {
109 pub bbox: BoundingBox,
111 pub confidence: f32,
113 pub order_index: Option<u32>,
115 pub element_indices: Vec<usize>,
117}
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct StructureResult {
142 pub input_path: Arc<str>,
144 pub index: usize,
146 pub layout_elements: Vec<LayoutElement>,
148 pub tables: Vec<TableResult>,
150 pub formulas: Vec<FormulaResult>,
152 pub text_regions: Option<Vec<TextRegion>>,
154 pub orientation_angle: Option<f32>,
156 pub region_blocks: Option<Vec<RegionBlock>>,
159 #[serde(skip)]
164 pub rectified_img: Option<Arc<RgbImage>>,
165}
166
167impl StructureResult {
168 pub fn new(input_path: impl Into<Arc<str>>, index: usize) -> Self {
170 Self {
171 input_path: input_path.into(),
172 index,
173 layout_elements: Vec::new(),
174 tables: Vec::new(),
175 formulas: Vec::new(),
176 text_regions: None,
177 orientation_angle: None,
178 region_blocks: None,
179 rectified_img: None,
180 }
181 }
182
183 pub fn with_layout_elements(mut self, elements: Vec<LayoutElement>) -> Self {
185 self.layout_elements = elements;
186 self
187 }
188
189 pub fn with_tables(mut self, tables: Vec<TableResult>) -> Self {
191 self.tables = tables;
192 self
193 }
194
195 pub fn with_formulas(mut self, formulas: Vec<FormulaResult>) -> Self {
197 self.formulas = formulas;
198 self
199 }
200
201 pub fn with_text_regions(mut self, regions: Vec<TextRegion>) -> Self {
203 self.text_regions = Some(regions);
204 self
205 }
206
207 pub fn with_region_blocks(mut self, blocks: Vec<RegionBlock>) -> Self {
212 self.region_blocks = Some(blocks);
213 self
214 }
215
216 pub fn to_markdown(&self) -> String {
228 let table_bboxes: Vec<&BoundingBox> = self
230 .layout_elements
231 .iter()
232 .filter(|e| e.element_type == LayoutElementType::Table)
233 .map(|e| &e.bbox)
234 .collect();
235
236 let mut md = String::new();
237 for element in &self.layout_elements {
238 if matches!(
240 element.element_type,
241 LayoutElementType::Number
242 | LayoutElementType::Footnote
243 | LayoutElementType::Header
244 | LayoutElementType::HeaderImage
245 | LayoutElementType::Footer
246 | LayoutElementType::FooterImage
247 | LayoutElementType::AsideText
248 ) {
249 continue;
250 }
251
252 if element.element_type == LayoutElementType::Text {
256 let overlaps_table = table_bboxes.iter().any(|table_bbox| {
257 element.bbox.ioa(table_bbox) > 0.3 });
259
260 if overlaps_table && element.confidence < 0.7 {
263 continue;
264 }
265 }
266
267 match element.element_type {
268 LayoutElementType::DocTitle => {
270 md.push_str("\n# ");
271 if let Some(text) = &element.text {
272 md.push_str(&text.replace("-\n", "").replace('\n', " "));
273 }
274 md.push_str("\n\n");
275 }
276 LayoutElementType::ParagraphTitle => {
278 if let Some(text) = &element.text {
279 let (level, formatted_title) = format_title_with_level(text);
280 md.push('\n');
281 for _ in 0..level {
282 md.push('#');
283 }
284 md.push(' ');
285 md.push_str(&formatted_title);
286 md.push_str("\n\n");
287 } else {
288 md.push_str("\n## \n\n");
289 }
290 }
291 LayoutElementType::Table => {
293 if let Some(table) =
294 self.tables.iter().find(|t| t.bbox.iou(&element.bbox) > 0.5)
295 {
296 if let Some(html) = &table.html_structure {
297 let table_with_border = html.replace("<table>", "<table border=\"1\">");
299 md.push('\n');
300 md.push_str(&table_with_border);
301 md.push_str("\n\n");
302 } else {
303 md.push_str("\n[Table]\n\n");
304 }
305 } else {
306 md.push_str("\n[Table]\n\n");
307 }
308 }
309 LayoutElementType::Formula | LayoutElementType::FormulaNumber => {
311 md.push_str("\n$$");
312 if let Some(latex) = &element.text {
313 md.push_str(latex);
314 }
315 md.push_str("$$\n\n");
316 }
317 LayoutElementType::Image | LayoutElementType::Chart => {
319 md.push_str("\n![Figure]");
320 if let Some(caption) = &element.text {
321 md.push('(');
322 md.push_str(caption);
323 md.push(')');
324 }
325 md.push_str("\n\n");
326 }
327 LayoutElementType::Seal => {
329 md.push_str("\n![Seal]");
330 if let Some(text) = &element.text {
331 md.push_str("\n> ");
332 md.push_str(text);
333 }
334 md.push_str("\n\n");
335 }
336 _ if element.element_type.is_caption() => {
338 if let Some(text) = &element.text {
339 md.push('*');
340 md.push_str(text);
341 md.push_str("*\n\n");
342 }
343 }
344 LayoutElementType::Abstract => {
346 md.push_str("\n**Abstract**\n\n");
347 if let Some(text) = &element.text {
348 md.push_str(text);
349 md.push_str("\n\n");
350 }
351 }
352 LayoutElementType::Reference => {
354 md.push_str("\n**References**\n\n");
355 if let Some(text) = &element.text {
356 md.push_str(text);
357 md.push_str("\n\n");
358 }
359 }
360 LayoutElementType::List => {
362 if let Some(text) = &element.text {
363 for line in text.lines() {
365 md.push_str("- ");
366 md.push_str(line);
367 md.push('\n');
368 }
369 md.push('\n');
370 }
371 }
372 _ if element.element_type.is_header() || element.element_type.is_footer() => {
374 if let Some(text) = &element.text {
375 md.push_str("<small>");
376 md.push_str(text);
377 md.push_str("</small>\n\n");
378 }
379 }
380 _ => {
382 if let Some(text) = &element.text {
383 let formatted = text.replace("\n\n", "\n").replace('\n', "\n\n");
385 md.push_str(&formatted);
386 md.push_str("\n\n");
387 }
388 }
389 }
390 }
391 md.trim().to_string()
392 }
393
394 pub fn to_html(&self) -> String {
398 let mut html = String::from(
399 "<!DOCTYPE html>\n<html>\n<head>\n<meta charset=\"UTF-8\">\n</head>\n<body>\n",
400 );
401
402 for element in &self.layout_elements {
403 match element.element_type {
404 LayoutElementType::DocTitle => {
406 html.push_str("<h1>");
407 if let Some(text) = &element.text {
408 html.push_str(&Self::escape_html(text));
409 }
410 html.push_str("</h1>\n");
411 }
412 LayoutElementType::ParagraphTitle => {
414 html.push_str("<h2>");
415 if let Some(text) = &element.text {
416 html.push_str(&Self::escape_html(text));
417 }
418 html.push_str("</h2>\n");
419 }
420 LayoutElementType::Table => {
422 if let Some(table) =
423 self.tables.iter().find(|t| t.bbox.iou(&element.bbox) > 0.5)
424 {
425 if let Some(table_html) = &table.html_structure {
426 let styled = table_html.replace(
428 "<table>",
429 "<table border=\"1\" style=\"border-collapse: collapse;\">",
430 );
431 html.push_str(&styled);
432 html.push('\n');
433 } else {
434 html.push_str("<p>[Table]</p>\n");
435 }
436 } else {
437 html.push_str("<p>[Table]</p>\n");
438 }
439 }
440 LayoutElementType::Formula | LayoutElementType::FormulaNumber => {
442 html.push_str("<p class=\"formula\">$$");
443 if let Some(latex) = &element.text {
444 html.push_str(&Self::escape_html(latex));
445 }
446 html.push_str("$$</p>\n");
447 }
448 LayoutElementType::Image | LayoutElementType::Chart => {
450 html.push_str("<figure>\n<img alt=\"Figure\" />\n");
451 if let Some(caption) = &element.text {
452 html.push_str("<figcaption>");
453 html.push_str(&Self::escape_html(caption));
454 html.push_str("</figcaption>\n");
455 }
456 html.push_str("</figure>\n");
457 }
458 LayoutElementType::Seal => {
460 html.push_str("<figure class=\"seal\">\n<img alt=\"Seal\" />\n");
461 if let Some(text) = &element.text {
462 html.push_str("<figcaption>");
463 html.push_str(&Self::escape_html(text));
464 html.push_str("</figcaption>\n");
465 }
466 html.push_str("</figure>\n");
467 }
468 _ if element.element_type.is_caption() => {
470 if let Some(text) = &element.text {
471 html.push_str("<figcaption>");
472 html.push_str(&Self::escape_html(text));
473 html.push_str("</figcaption>\n");
474 }
475 }
476 LayoutElementType::Abstract => {
478 html.push_str("<section class=\"abstract\">\n<h3>Abstract</h3>\n<p>");
479 if let Some(text) = &element.text {
480 html.push_str(&Self::escape_html(text));
481 }
482 html.push_str("</p>\n</section>\n");
483 }
484 LayoutElementType::Reference | LayoutElementType::ReferenceContent => {
486 html.push_str("<section class=\"references\">\n<p>");
487 if let Some(text) = &element.text {
488 html.push_str(&Self::escape_html(text));
489 }
490 html.push_str("</p>\n</section>\n");
491 }
492 LayoutElementType::List => {
494 html.push_str("<ul>\n");
495 if let Some(text) = &element.text {
496 for line in text.lines() {
497 html.push_str("<li>");
498 html.push_str(&Self::escape_html(line));
499 html.push_str("</li>\n");
500 }
501 }
502 html.push_str("</ul>\n");
503 }
504 _ if element.element_type.is_header() => {
506 html.push_str("<header>");
507 if let Some(text) = &element.text {
508 html.push_str(&Self::escape_html(text));
509 }
510 html.push_str("</header>\n");
511 }
512 _ if element.element_type.is_footer() => {
514 html.push_str("<footer>");
515 if let Some(text) = &element.text {
516 html.push_str(&Self::escape_html(text));
517 }
518 html.push_str("</footer>\n");
519 }
520 _ => {
522 if let Some(text) = &element.text {
523 html.push_str("<p>");
524 html.push_str(&Self::escape_html(text));
525 html.push_str("</p>\n");
526 }
527 }
528 }
529 }
530 html.push_str("</body>\n</html>");
531 html
532 }
533
534 fn escape_html(text: &str) -> String {
536 text.replace('&', "&")
537 .replace('<', "<")
538 .replace('>', ">")
539 .replace('"', """)
540 .replace('\'', "'")
541 }
542
543 pub fn to_json_value(&self) -> serde_json::Result<serde_json::Value> {
545 serde_json::to_value(self)
546 }
547
548 pub fn save_results(
559 &self,
560 output_dir: impl AsRef<Path>,
561 to_json: bool,
562 to_markdown: bool,
563 to_html: bool,
564 ) -> std::io::Result<()> {
565 let output_dir = output_dir.as_ref();
566 if !output_dir.exists() {
567 std::fs::create_dir_all(output_dir)?;
568 }
569
570 let input_path = Path::new(self.input_path.as_ref());
571 let stem = input_path
572 .file_stem()
573 .and_then(|s| s.to_str())
574 .unwrap_or("result");
575
576 if to_json {
578 let json_path = output_dir.join(format!("{}.json", stem));
579 let json_file = std::fs::File::create(json_path)?;
580 serde_json::to_writer_pretty(json_file, self)?;
581 }
582
583 if to_markdown {
585 let md_path = output_dir.join(format!("{}.md", stem));
586 std::fs::write(md_path, self.to_markdown())?;
587 }
588
589 if to_html {
591 let html_path = output_dir.join(format!("{}.html", stem));
592 std::fs::write(html_path, self.to_html())?;
593 }
594
595 Ok(())
596 }
597}
598
599#[derive(Debug, Clone, Serialize, Deserialize)]
601pub struct LayoutElement {
602 pub bbox: BoundingBox,
604 pub element_type: LayoutElementType,
606 pub confidence: f32,
608 pub label: Option<String>,
610 pub text: Option<String>,
612 pub order_index: Option<u32>,
619}
620
621impl LayoutElement {
622 pub fn new(bbox: BoundingBox, element_type: LayoutElementType, confidence: f32) -> Self {
624 Self {
625 bbox,
626 element_type,
627 confidence,
628 label: None,
629 text: None,
630 order_index: None,
631 }
632 }
633
634 pub fn with_label(mut self, label: impl Into<String>) -> Self {
636 self.label = Some(label.into());
637 self
638 }
639
640 pub fn with_text(mut self, text: impl Into<String>) -> Self {
642 self.text = Some(text.into());
643 self
644 }
645}
646
647#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
694pub enum LayoutElementType {
695 DocTitle,
697 ParagraphTitle,
699 Text,
701 Content,
703 Abstract,
705
706 Image,
708 Table,
710 Chart,
712 Formula,
714
715 FigureTitle,
717 TableTitle,
719 ChartTitle,
721 FigureTableChartTitle,
723
724 Header,
726 HeaderImage,
728 Footer,
730 FooterImage,
732 Footnote,
734
735 Seal,
737 Number,
739 Reference,
741 ReferenceContent,
743 Algorithm,
745 FormulaNumber,
747 AsideText,
749 List,
751
752 Region,
755
756 Other,
758}
759
760impl LayoutElementType {
761 pub fn as_str(&self) -> &'static str {
765 match self {
766 LayoutElementType::DocTitle => "doc_title",
768 LayoutElementType::ParagraphTitle => "paragraph_title",
769 LayoutElementType::Text => "text",
770 LayoutElementType::Content => "content",
771 LayoutElementType::Abstract => "abstract",
772
773 LayoutElementType::Image => "image",
775 LayoutElementType::Table => "table",
776 LayoutElementType::Chart => "chart",
777 LayoutElementType::Formula => "formula",
778
779 LayoutElementType::FigureTitle => "figure_title",
781 LayoutElementType::TableTitle => "table_title",
782 LayoutElementType::ChartTitle => "chart_title",
783 LayoutElementType::FigureTableChartTitle => "figure_table_chart_title",
784
785 LayoutElementType::Header => "header",
787 LayoutElementType::HeaderImage => "header_image",
788 LayoutElementType::Footer => "footer",
789 LayoutElementType::FooterImage => "footer_image",
790 LayoutElementType::Footnote => "footnote",
791
792 LayoutElementType::Seal => "seal",
794 LayoutElementType::Number => "number",
795 LayoutElementType::Reference => "reference",
796 LayoutElementType::ReferenceContent => "reference_content",
797 LayoutElementType::Algorithm => "algorithm",
798 LayoutElementType::FormulaNumber => "formula_number",
799 LayoutElementType::AsideText => "aside_text",
800 LayoutElementType::List => "list",
801
802 LayoutElementType::Region => "region",
804
805 LayoutElementType::Other => "other",
807 }
808 }
809
810 pub fn from_label(label: &str) -> Self {
815 match label.to_lowercase().as_str() {
816 "doc_title" => LayoutElementType::DocTitle,
818 "paragraph_title" | "title" => LayoutElementType::ParagraphTitle,
819 "text" | "paragraph" => LayoutElementType::Text,
820 "content" => LayoutElementType::Content,
821 "abstract" => LayoutElementType::Abstract,
822
823 "image" | "figure" => LayoutElementType::Image,
825 "table" => LayoutElementType::Table,
826 "chart" | "flowchart" => LayoutElementType::Chart,
827 "formula" | "equation" | "display_formula" | "inline_formula" => {
828 LayoutElementType::Formula
829 }
830
831 "figure_title" => LayoutElementType::FigureTitle,
833 "table_title" => LayoutElementType::TableTitle,
834 "chart_title" => LayoutElementType::ChartTitle,
835 "figure_table_chart_title" | "caption" => LayoutElementType::FigureTableChartTitle,
836
837 "header" => LayoutElementType::Header,
839 "header_image" => LayoutElementType::HeaderImage,
840 "footer" => LayoutElementType::Footer,
841 "footer_image" => LayoutElementType::FooterImage,
842 "footnote" | "vision_footnote" => LayoutElementType::Footnote,
843
844 "seal" => LayoutElementType::Seal,
846 "number" => LayoutElementType::Number,
847 "reference" => LayoutElementType::Reference,
848 "reference_content" => LayoutElementType::ReferenceContent,
849 "algorithm" => LayoutElementType::Algorithm,
850 "formula_number" => LayoutElementType::FormulaNumber,
851 "aside_text" => LayoutElementType::AsideText,
852 "list" => LayoutElementType::List,
853 "vertical_text" => LayoutElementType::Text,
854
855 "region" => LayoutElementType::Region,
857
858 _ => LayoutElementType::Other,
861 }
862 }
863
864 pub fn semantic_category(&self) -> &'static str {
883 match self {
884 LayoutElementType::DocTitle | LayoutElementType::ParagraphTitle => "title",
886
887 LayoutElementType::Text | LayoutElementType::Content | LayoutElementType::Abstract => {
889 "text"
890 }
891
892 LayoutElementType::Image | LayoutElementType::Chart => "visual",
894
895 LayoutElementType::Table => "table",
897
898 LayoutElementType::FigureTitle
900 | LayoutElementType::TableTitle
901 | LayoutElementType::ChartTitle
902 | LayoutElementType::FigureTableChartTitle => "caption",
903
904 LayoutElementType::Header | LayoutElementType::HeaderImage => "header",
906
907 LayoutElementType::Footer
909 | LayoutElementType::FooterImage
910 | LayoutElementType::Footnote => "footer",
911
912 LayoutElementType::Formula | LayoutElementType::FormulaNumber => "formula",
914
915 LayoutElementType::Seal
917 | LayoutElementType::Number
918 | LayoutElementType::Reference
919 | LayoutElementType::ReferenceContent
920 | LayoutElementType::Algorithm
921 | LayoutElementType::AsideText => "special",
922
923 LayoutElementType::List => "list",
925
926 LayoutElementType::Region => "region",
928
929 LayoutElementType::Other => "other",
931 }
932 }
933
934 pub fn is_title(&self) -> bool {
936 matches!(
937 self,
938 LayoutElementType::DocTitle | LayoutElementType::ParagraphTitle
939 )
940 }
941
942 pub fn is_visual(&self) -> bool {
944 matches!(self, LayoutElementType::Image | LayoutElementType::Chart)
945 }
946
947 pub fn is_caption(&self) -> bool {
949 matches!(
950 self,
951 LayoutElementType::FigureTitle
952 | LayoutElementType::TableTitle
953 | LayoutElementType::ChartTitle
954 | LayoutElementType::FigureTableChartTitle
955 )
956 }
957
958 pub fn is_header(&self) -> bool {
960 matches!(
961 self,
962 LayoutElementType::Header | LayoutElementType::HeaderImage
963 )
964 }
965
966 pub fn is_footer(&self) -> bool {
968 matches!(
969 self,
970 LayoutElementType::Footer
971 | LayoutElementType::FooterImage
972 | LayoutElementType::Footnote
973 )
974 }
975
976 pub fn is_formula(&self) -> bool {
978 matches!(
979 self,
980 LayoutElementType::Formula | LayoutElementType::FormulaNumber
981 )
982 }
983
984 pub fn should_ocr(&self) -> bool {
986 matches!(
987 self,
988 LayoutElementType::Text
989 | LayoutElementType::Content
990 | LayoutElementType::Abstract
991 | LayoutElementType::DocTitle
992 | LayoutElementType::ParagraphTitle
993 | LayoutElementType::FigureTitle
994 | LayoutElementType::TableTitle
995 | LayoutElementType::ChartTitle
996 | LayoutElementType::FigureTableChartTitle
997 | LayoutElementType::Header
998 | LayoutElementType::HeaderImage
999 | LayoutElementType::Footer
1000 | LayoutElementType::FooterImage
1001 | LayoutElementType::Footnote
1002 | LayoutElementType::Reference
1003 | LayoutElementType::ReferenceContent
1004 | LayoutElementType::Algorithm
1005 | LayoutElementType::AsideText
1006 | LayoutElementType::List
1007 | LayoutElementType::Number
1008 )
1009 }
1010}
1011
1012pub fn remove_overlapping_layout_elements(
1017 layout_elements: &mut Vec<LayoutElement>,
1018 overlap_threshold: f32,
1019) -> usize {
1020 use std::collections::HashSet;
1021
1022 if layout_elements.len() <= 1 {
1023 return 0;
1024 }
1025
1026 let bboxes: Vec<_> = layout_elements.iter().map(|e| e.bbox.clone()).collect();
1027 let labels: Vec<&str> = layout_elements
1028 .iter()
1029 .map(|e| e.element_type.as_str())
1030 .collect();
1031
1032 let remove_indices =
1033 crate::processors::get_overlap_removal_indices(&bboxes, &labels, overlap_threshold);
1034 if remove_indices.is_empty() {
1035 return 0;
1036 }
1037
1038 let remove_set: HashSet<usize> = remove_indices.into_iter().collect();
1039 let before = layout_elements.len();
1040
1041 let mut idx = 0;
1042 layout_elements.retain(|_| {
1043 let keep = !remove_set.contains(&idx);
1044 idx += 1;
1045 keep
1046 });
1047
1048 before.saturating_sub(layout_elements.len())
1049}
1050
1051pub fn apply_standardized_layout_label_fixes(layout_elements: &mut [LayoutElement]) {
1055 if layout_elements.is_empty() {
1056 return;
1057 }
1058
1059 let mut footnote_indices: Vec<usize> = Vec::new();
1060 let mut paragraph_title_indices: Vec<usize> = Vec::new();
1061 let mut bottom_text_y_max: f32 = 0.0;
1062 let mut max_block_area: f32 = 0.0;
1063 let mut doc_title_num: usize = 0;
1064
1065 for (idx, elem) in layout_elements.iter().enumerate() {
1066 let area =
1067 (elem.bbox.x_max() - elem.bbox.x_min()) * (elem.bbox.y_max() - elem.bbox.y_min());
1068 max_block_area = max_block_area.max(area);
1069
1070 match elem.element_type {
1071 LayoutElementType::Footnote => footnote_indices.push(idx),
1072 LayoutElementType::ParagraphTitle => paragraph_title_indices.push(idx),
1073 LayoutElementType::Text => {
1074 bottom_text_y_max = bottom_text_y_max.max(elem.bbox.y_max());
1075 }
1076 LayoutElementType::DocTitle => doc_title_num += 1,
1077 _ => {}
1078 }
1079 }
1080
1081 for idx in footnote_indices {
1082 if layout_elements[idx].bbox.y_max() < bottom_text_y_max {
1083 layout_elements[idx].element_type = LayoutElementType::Text;
1084 layout_elements[idx].label = Some("text".to_string());
1085 }
1086 }
1087
1088 let only_one_paragraph_title = paragraph_title_indices.len() == 1 && doc_title_num == 0;
1089 if only_one_paragraph_title {
1090 let idx = paragraph_title_indices[0];
1091 let area = (layout_elements[idx].bbox.x_max() - layout_elements[idx].bbox.x_min())
1092 * (layout_elements[idx].bbox.y_max() - layout_elements[idx].bbox.y_min());
1093
1094 let title_area_ratio_threshold = 0.3f32;
1095 if area > max_block_area * title_area_ratio_threshold {
1096 layout_elements[idx].element_type = LayoutElementType::DocTitle;
1097 layout_elements[idx].label = Some("doc_title".to_string());
1098 }
1099 }
1100}
1101
1102#[derive(Debug, Clone, Serialize, Deserialize)]
1104pub struct TableResult {
1105 pub bbox: BoundingBox,
1107 pub table_type: TableType,
1109 pub classification_confidence: Option<f32>,
1111 pub structure_confidence: Option<f32>,
1113 pub cells: Vec<TableCell>,
1115 pub html_structure: Option<String>,
1117 pub cell_texts: Option<Vec<Option<String>>>,
1119 #[serde(skip)]
1121 pub structure_tokens: Option<Vec<String>>,
1122}
1123
1124impl TableResult {
1125 pub fn new(bbox: BoundingBox, table_type: TableType) -> Self {
1127 Self {
1128 bbox,
1129 table_type,
1130 classification_confidence: None,
1131 structure_confidence: None,
1132 cells: Vec::new(),
1133 html_structure: None,
1134 cell_texts: None,
1135 structure_tokens: None,
1136 }
1137 }
1138
1139 pub fn with_classification_confidence(mut self, confidence: f32) -> Self {
1141 self.classification_confidence = Some(confidence);
1142 self
1143 }
1144
1145 pub fn with_structure_confidence(mut self, confidence: f32) -> Self {
1147 self.structure_confidence = Some(confidence);
1148 self
1149 }
1150
1151 pub fn with_cells(mut self, cells: Vec<TableCell>) -> Self {
1153 self.cells = cells;
1154 self
1155 }
1156
1157 pub fn with_html_structure(mut self, html: impl Into<String>) -> Self {
1159 self.html_structure = Some(html.into());
1160 self
1161 }
1162
1163 pub fn with_cell_texts(mut self, texts: Vec<Option<String>>) -> Self {
1165 self.cell_texts = Some(texts);
1166 self
1167 }
1168
1169 pub fn with_structure_tokens(mut self, tokens: Vec<String>) -> Self {
1171 self.structure_tokens = Some(tokens);
1172 self
1173 }
1174
1175 pub fn confidence(&self) -> Option<f32> {
1187 match (self.classification_confidence, self.structure_confidence) {
1188 (Some(cls), Some(str)) => Some(cls.min(str)),
1189 (None, Some(str)) => Some(str),
1190 (Some(cls), None) => Some(cls),
1191 (None, None) => None,
1192 }
1193 }
1194
1195 pub fn has_structure(&self) -> bool {
1200 !self.cells.is_empty() || self.html_structure.is_some()
1201 }
1202}
1203
1204#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1206pub enum TableType {
1207 Wired,
1209 Wireless,
1211 Unknown,
1213}
1214
1215#[derive(Debug, Clone, Serialize, Deserialize)]
1217pub struct TableCell {
1218 pub bbox: BoundingBox,
1220 pub row: Option<usize>,
1222 pub col: Option<usize>,
1224 pub row_span: Option<usize>,
1226 pub col_span: Option<usize>,
1228 pub confidence: f32,
1230 pub text: Option<String>,
1232}
1233
1234impl TableCell {
1235 pub fn new(bbox: BoundingBox, confidence: f32) -> Self {
1237 Self {
1238 bbox,
1239 row: None,
1240 col: None,
1241 row_span: None,
1242 col_span: None,
1243 confidence,
1244 text: None,
1245 }
1246 }
1247
1248 pub fn with_position(mut self, row: usize, col: usize) -> Self {
1250 self.row = Some(row);
1251 self.col = Some(col);
1252 self
1253 }
1254
1255 pub fn with_span(mut self, row_span: usize, col_span: usize) -> Self {
1257 self.row_span = Some(row_span);
1258 self.col_span = Some(col_span);
1259 self
1260 }
1261
1262 pub fn with_text(mut self, text: impl Into<String>) -> Self {
1264 self.text = Some(text.into());
1265 self
1266 }
1267}
1268
1269#[derive(Debug, Clone, Serialize, Deserialize)]
1271pub struct FormulaResult {
1272 pub bbox: BoundingBox,
1274 pub latex: String,
1276 pub confidence: f32,
1278}
1279
1280impl FormulaResult {
1281 pub fn new(bbox: BoundingBox, latex: impl Into<String>, confidence: f32) -> Self {
1283 Self {
1284 bbox,
1285 latex: latex.into(),
1286 confidence,
1287 }
1288 }
1289}
1290
1291#[cfg(test)]
1292mod tests {
1293 use super::*;
1294
1295 #[test]
1296 fn test_structure_result_creation() {
1297 let result = StructureResult::new("test.jpg", 0);
1298 assert_eq!(result.input_path.as_ref(), "test.jpg");
1299 assert_eq!(result.index, 0);
1300 assert!(result.layout_elements.is_empty());
1301 assert!(result.tables.is_empty());
1302 assert!(result.formulas.is_empty());
1303 assert!(result.text_regions.is_none());
1304 }
1305
1306 #[test]
1307 fn test_layout_element_type_as_str() {
1308 assert_eq!(LayoutElementType::Text.as_str(), "text");
1309 assert_eq!(LayoutElementType::Table.as_str(), "table");
1310 assert_eq!(LayoutElementType::Formula.as_str(), "formula");
1311 }
1312
1313 #[test]
1314 fn test_table_result_creation() {
1315 let bbox = BoundingBox::from_coords(0.0, 0.0, 100.0, 100.0);
1316 let table = TableResult::new(bbox, TableType::Wired);
1317 assert_eq!(table.table_type, TableType::Wired);
1318 assert!(table.cells.is_empty());
1319 assert!(table.html_structure.is_none());
1320 }
1321
1322 #[test]
1323 fn test_structure_result_export() {
1324 let bbox = BoundingBox::from_coords(0.0, 0.0, 100.0, 100.0);
1325 let mut result = StructureResult::new("test.jpg", 0);
1326
1327 let title = LayoutElement::new(bbox.clone(), LayoutElementType::DocTitle, 1.0)
1328 .with_text("Test Document");
1329
1330 let text =
1331 LayoutElement::new(bbox.clone(), LayoutElementType::Text, 1.0).with_text("Hello world");
1332
1333 result = result.with_layout_elements(vec![title, text]);
1334
1335 let md = result.to_markdown();
1336 assert!(md.contains("# Test Document"));
1337 assert!(md.contains("Hello world"));
1338
1339 let html = result.to_html();
1340 assert!(html.contains("<h1>Test Document</h1>"));
1341 assert!(html.contains("<p>Hello world</p>"));
1342 }
1343}