1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
9pub struct BoundingBox {
10 pub x: f64,
12 pub y: f64,
14 pub width: f64,
16 pub height: f64,
18}
19
20impl BoundingBox {
21 pub fn new(x: f64, y: f64, width: f64, height: f64) -> Self {
23 Self {
24 x,
25 y,
26 width,
27 height,
28 }
29 }
30
31 pub fn right(&self) -> f64 {
33 self.x + self.width
34 }
35
36 pub fn top(&self) -> f64 {
38 self.y + self.height
39 }
40
41 pub fn contains(&self, x: f64, y: f64) -> bool {
43 x >= self.x && x <= self.right() && y >= self.y && y <= self.top()
44 }
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct Table {
53 pub rows: Vec<Row>,
55 pub columns: Vec<Column>,
57 pub bounding_box: BoundingBox,
59 pub confidence: f64,
63}
64
65impl Table {
66 pub fn new(
68 rows: Vec<Row>,
69 columns: Vec<Column>,
70 bounding_box: BoundingBox,
71 confidence: f64,
72 ) -> Self {
73 Self {
74 rows,
75 columns,
76 bounding_box,
77 confidence,
78 }
79 }
80
81 pub fn row_count(&self) -> usize {
83 self.rows.len()
84 }
85
86 pub fn column_count(&self) -> usize {
88 self.columns.len()
89 }
90
91 pub fn get_cell(&self, row_idx: usize, col_idx: usize) -> Option<&Cell> {
93 self.rows.get(row_idx)?.cells.get(col_idx)
94 }
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct Row {
100 pub cells: Vec<Cell>,
102 pub y_position: f64,
104 pub height: f64,
106}
107
108impl Row {
109 pub fn new(cells: Vec<Cell>, y_position: f64, height: f64) -> Self {
111 Self {
112 cells,
113 y_position,
114 height,
115 }
116 }
117}
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct Cell {
122 pub text: String,
124 pub column_index: usize,
126 pub bounding_box: BoundingBox,
128}
129
130impl Cell {
131 pub fn new(column_index: usize, bounding_box: BoundingBox) -> Self {
133 Self {
134 text: String::new(),
135 column_index,
136 bounding_box,
137 }
138 }
139
140 pub fn add_text(&mut self, text: &str) {
142 if !self.text.is_empty() {
143 self.text.push(' ');
144 }
145 self.text.push_str(text);
146 }
147
148 pub fn is_empty(&self) -> bool {
150 self.text.trim().is_empty()
151 }
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct Column {
157 pub x_position: f64,
159 pub width: f64,
161 pub alignment: Alignment,
163}
164
165impl Column {
166 pub fn new(x_position: f64, width: f64, alignment: Alignment) -> Self {
168 Self {
169 x_position,
170 width,
171 alignment,
172 }
173 }
174
175 pub fn left(&self) -> f64 {
177 self.x_position - self.width / 2.0
178 }
179
180 pub fn right(&self) -> f64 {
182 self.x_position + self.width / 2.0
183 }
184}
185
186#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
188pub enum Alignment {
189 Left,
191 Right,
193 Center,
195 Justified,
197}
198
199impl Default for Alignment {
200 fn default() -> Self {
201 Alignment::Left
202 }
203}
204
205#[derive(Debug, Clone, Serialize, Deserialize)]
209pub struct KeyValuePair {
210 pub key: String,
212 pub value: String,
214 pub confidence: f64,
216 pub pattern: KeyValuePattern,
218}
219
220impl KeyValuePair {
221 pub fn new(key: String, value: String, confidence: f64, pattern: KeyValuePattern) -> Self {
223 Self {
224 key,
225 value,
226 confidence,
227 pattern,
228 }
229 }
230}
231
232#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
234pub enum KeyValuePattern {
235 ColonSeparated,
237 SpatialAlignment,
239 Tabular,
241}
242
243#[derive(Debug, Clone)]
245pub struct StructuredDataConfig {
246 pub min_table_rows: usize,
248 pub min_table_columns: usize,
250 pub column_alignment_tolerance: f64,
254 pub row_alignment_tolerance: f64,
256 pub detect_tables: bool,
258 pub detect_key_value: bool,
260 pub detect_multi_column: bool,
262 pub min_column_gap: f64,
264}
265
266impl Default for StructuredDataConfig {
267 fn default() -> Self {
268 Self {
269 min_table_rows: 2,
270 min_table_columns: 2,
271 column_alignment_tolerance: 5.0,
272 row_alignment_tolerance: 3.0,
273 detect_tables: true,
274 detect_key_value: true,
275 detect_multi_column: true,
276 min_column_gap: 20.0,
277 }
278 }
279}
280
281impl StructuredDataConfig {
282 pub fn new() -> Self {
284 Self::default()
285 }
286
287 pub fn with_min_table_rows(mut self, rows: usize) -> Self {
289 self.min_table_rows = rows;
290 self
291 }
292
293 pub fn with_min_table_columns(mut self, columns: usize) -> Self {
295 self.min_table_columns = columns;
296 self
297 }
298
299 pub fn with_column_tolerance(mut self, tolerance: f64) -> Self {
301 self.column_alignment_tolerance = tolerance;
302 self
303 }
304
305 pub fn with_row_tolerance(mut self, tolerance: f64) -> Self {
307 self.row_alignment_tolerance = tolerance;
308 self
309 }
310
311 pub fn with_table_detection(mut self, enabled: bool) -> Self {
313 self.detect_tables = enabled;
314 self
315 }
316
317 pub fn with_key_value_detection(mut self, enabled: bool) -> Self {
319 self.detect_key_value = enabled;
320 self
321 }
322
323 pub fn with_multi_column_detection(mut self, enabled: bool) -> Self {
325 self.detect_multi_column = enabled;
326 self
327 }
328}
329
330#[derive(Debug, Clone, Serialize, Deserialize)]
332pub struct ColumnBoundary {
333 pub x_position: f64,
335 pub gap_width: f64,
337}
338
339impl ColumnBoundary {
340 pub fn new(x_position: f64, gap_width: f64) -> Self {
342 Self {
343 x_position,
344 gap_width,
345 }
346 }
347}
348
349#[derive(Debug, Clone, Serialize, Deserialize)]
351pub struct ColumnSection {
352 pub column_index: usize,
354 pub text: String,
356 pub bounding_box: BoundingBox,
358}
359
360impl ColumnSection {
361 pub fn new(column_index: usize, text: String, bounding_box: BoundingBox) -> Self {
363 Self {
364 column_index,
365 text,
366 bounding_box,
367 }
368 }
369}
370
371#[derive(Debug, Clone, Serialize, Deserialize)]
373pub struct StructuredDataResult {
374 pub tables: Vec<Table>,
376 pub key_value_pairs: Vec<KeyValuePair>,
378 pub column_sections: Vec<ColumnSection>,
380}
381
382impl StructuredDataResult {
383 pub fn new() -> Self {
385 Self {
386 tables: Vec::new(),
387 key_value_pairs: Vec::new(),
388 column_sections: Vec::new(),
389 }
390 }
391}
392
393impl Default for StructuredDataResult {
394 fn default() -> Self {
395 Self::new()
396 }
397}
398
399#[cfg(test)]
400mod tests {
401 use super::*;
402
403 #[test]
404 fn test_bounding_box_basic() {
405 let bbox = BoundingBox::new(10.0, 20.0, 100.0, 50.0);
406 assert_eq!(bbox.x, 10.0);
407 assert_eq!(bbox.y, 20.0);
408 assert_eq!(bbox.width, 100.0);
409 assert_eq!(bbox.height, 50.0);
410 assert_eq!(bbox.right(), 110.0);
411 assert_eq!(bbox.top(), 70.0);
412 }
413
414 #[test]
415 fn test_bounding_box_contains() {
416 let bbox = BoundingBox::new(10.0, 20.0, 100.0, 50.0);
417 assert!(bbox.contains(50.0, 40.0)); assert!(bbox.contains(10.0, 20.0)); assert!(bbox.contains(110.0, 70.0)); assert!(!bbox.contains(5.0, 40.0)); assert!(!bbox.contains(120.0, 40.0)); }
423
424 #[test]
425 fn test_cell_operations() {
426 let bbox = BoundingBox::new(0.0, 0.0, 50.0, 20.0);
427 let mut cell = Cell::new(0, bbox);
428
429 assert!(cell.is_empty());
430
431 cell.add_text("Hello");
432 assert_eq!(cell.text, "Hello");
433 assert!(!cell.is_empty());
434
435 cell.add_text("World");
436 assert_eq!(cell.text, "Hello World");
437 }
438
439 #[test]
440 fn test_column_edges() {
441 let column = Column::new(100.0, 50.0, Alignment::Left);
442 assert_eq!(column.left(), 75.0);
443 assert_eq!(column.right(), 125.0);
444 }
445
446 #[test]
447 fn test_table_accessors() {
448 let bbox = BoundingBox::new(0.0, 0.0, 200.0, 100.0);
449 let cell = Cell::new(0, BoundingBox::new(0.0, 0.0, 50.0, 25.0));
450 let row = Row::new(vec![cell], 0.0, 25.0);
451 let column = Column::new(25.0, 50.0, Alignment::Left);
452
453 let table = Table::new(vec![row], vec![column], bbox, 0.95);
454
455 assert_eq!(table.row_count(), 1);
456 assert_eq!(table.column_count(), 1);
457 assert!(table.get_cell(0, 0).is_some());
458 assert!(table.get_cell(1, 0).is_none());
459 }
460
461 #[test]
462 fn test_config_builder() {
463 let config = StructuredDataConfig::new()
464 .with_min_table_rows(3)
465 .with_min_table_columns(4)
466 .with_column_tolerance(10.0)
467 .with_table_detection(false);
468
469 assert_eq!(config.min_table_rows, 3);
470 assert_eq!(config.min_table_columns, 4);
471 assert_eq!(config.column_alignment_tolerance, 10.0);
472 assert!(!config.detect_tables);
473 }
474
475 #[test]
476 fn test_alignment_default() {
477 assert_eq!(Alignment::default(), Alignment::Left);
478 }
479}