1use serde::{Deserialize, Serialize};
4
5use super::bbox::BoundingBox;
6use super::chunks::TextChunk;
7use super::content::ContentElement;
8use super::enums::SemanticType;
9
10pub const TABLE_BORDER_EPSILON: f64 = 0.5;
12
13pub const MIN_CELL_CONTENT_INTERSECTION_PERCENT: f64 = 0.01;
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct TableBorder {
19 pub bbox: BoundingBox,
21 pub index: Option<u32>,
23 pub level: Option<String>,
25 pub x_coordinates: Vec<f64>,
27 pub x_widths: Vec<f64>,
29 pub y_coordinates: Vec<f64>,
31 pub y_widths: Vec<f64>,
33 pub rows: Vec<TableBorderRow>,
35 pub num_rows: usize,
37 pub num_columns: usize,
39 pub is_bad_table: bool,
41 pub is_table_transformer: bool,
43 pub previous_table: Option<Box<TableBorder>>,
45 pub next_table: Option<Box<TableBorder>>,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct TableBorderRow {
52 pub bbox: BoundingBox,
54 pub index: Option<u32>,
56 pub level: Option<String>,
58 pub row_number: usize,
60 pub cells: Vec<TableBorderCell>,
62 pub semantic_type: Option<SemanticType>,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct TableBorderCell {
69 pub bbox: BoundingBox,
71 pub index: Option<u32>,
73 pub level: Option<String>,
75 pub row_number: usize,
77 pub col_number: usize,
79 pub row_span: usize,
81 pub col_span: usize,
83 pub content: Vec<TableToken>,
85 pub contents: Vec<ContentElement>,
87 pub semantic_type: Option<SemanticType>,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct TableToken {
94 pub base: TextChunk,
96 pub token_type: TableTokenType,
98}
99
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
102pub enum TableTokenType {
103 Text,
105 Image,
107 Table,
109}
110
111pub type TableTokenRow = Vec<TableToken>;
113
114#[derive(Debug, Clone, Default)]
116pub struct TableBordersCollection {
117 pub table_borders: Vec<Vec<TableBorder>>,
119}
120
121impl TableBordersCollection {
122 pub fn new(num_pages: usize) -> Self {
124 Self {
125 table_borders: vec![Vec::new(); num_pages],
126 }
127 }
128
129 pub fn add(&mut self, page: usize, border: TableBorder) {
131 if page < self.table_borders.len() {
132 self.table_borders[page].push(border);
133 }
134 }
135
136 pub fn get_page(&self, page: usize) -> &[TableBorder] {
138 if page < self.table_borders.len() {
139 &self.table_borders[page]
140 } else {
141 &[]
142 }
143 }
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149
150 #[test]
151 fn test_table_borders_collection() {
152 let mut collection = TableBordersCollection::new(5);
153 let border = TableBorder {
154 bbox: BoundingBox::new(Some(1), 10.0, 10.0, 200.0, 300.0),
155 index: None,
156 level: None,
157 x_coordinates: vec![10.0, 100.0, 200.0],
158 x_widths: vec![1.0, 1.0, 1.0],
159 y_coordinates: vec![10.0, 150.0, 300.0],
160 y_widths: vec![1.0, 1.0, 1.0],
161 rows: vec![],
162 num_rows: 2,
163 num_columns: 2,
164 is_bad_table: false,
165 is_table_transformer: false,
166 previous_table: None,
167 next_table: None,
168 };
169 collection.add(0, border);
170 assert_eq!(collection.get_page(0).len(), 1);
171 assert_eq!(collection.get_page(1).len(), 0);
172 assert_eq!(collection.get_page(10).len(), 0);
173 }
174}