use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BoundingBox {
pub x: f64,
pub y: f64,
pub width: f64,
pub height: f64,
}
impl BoundingBox {
pub fn new(x: f64, y: f64, width: f64, height: f64) -> Self {
Self {
x,
y,
width,
height,
}
}
pub fn right(&self) -> f64 {
self.x + self.width
}
pub fn top(&self) -> f64 {
self.y + self.height
}
pub fn contains(&self, x: f64, y: f64) -> bool {
x >= self.x && x <= self.right() && y >= self.y && y <= self.top()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Table {
pub rows: Vec<Row>,
pub columns: Vec<Column>,
pub bounding_box: BoundingBox,
pub confidence: f64,
}
impl Table {
pub fn new(
rows: Vec<Row>,
columns: Vec<Column>,
bounding_box: BoundingBox,
confidence: f64,
) -> Self {
Self {
rows,
columns,
bounding_box,
confidence,
}
}
pub fn row_count(&self) -> usize {
self.rows.len()
}
pub fn column_count(&self) -> usize {
self.columns.len()
}
pub fn get_cell(&self, row_idx: usize, col_idx: usize) -> Option<&Cell> {
self.rows.get(row_idx)?.cells.get(col_idx)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Row {
pub cells: Vec<Cell>,
pub y_position: f64,
pub height: f64,
}
impl Row {
pub fn new(cells: Vec<Cell>, y_position: f64, height: f64) -> Self {
Self {
cells,
y_position,
height,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Cell {
pub text: String,
pub column_index: usize,
pub bounding_box: BoundingBox,
}
impl Cell {
pub fn new(column_index: usize, bounding_box: BoundingBox) -> Self {
Self {
text: String::new(),
column_index,
bounding_box,
}
}
pub fn add_text(&mut self, text: &str) {
if !self.text.is_empty() {
self.text.push(' ');
}
self.text.push_str(text);
}
pub fn is_empty(&self) -> bool {
self.text.trim().is_empty()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Column {
pub x_position: f64,
pub width: f64,
pub alignment: Alignment,
}
impl Column {
pub fn new(x_position: f64, width: f64, alignment: Alignment) -> Self {
Self {
x_position,
width,
alignment,
}
}
pub fn left(&self) -> f64 {
self.x_position - self.width / 2.0
}
pub fn right(&self) -> f64 {
self.x_position + self.width / 2.0
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Alignment {
Left,
Right,
Center,
Justified,
}
impl Default for Alignment {
fn default() -> Self {
Alignment::Left
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KeyValuePair {
pub key: String,
pub value: String,
pub confidence: f64,
pub pattern: KeyValuePattern,
}
impl KeyValuePair {
pub fn new(key: String, value: String, confidence: f64, pattern: KeyValuePattern) -> Self {
Self {
key,
value,
confidence,
pattern,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum KeyValuePattern {
ColonSeparated,
SpatialAlignment,
Tabular,
}
#[derive(Debug, Clone)]
pub struct StructuredDataConfig {
pub min_table_rows: usize,
pub min_table_columns: usize,
pub column_alignment_tolerance: f64,
pub row_alignment_tolerance: f64,
pub detect_tables: bool,
pub detect_key_value: bool,
pub detect_multi_column: bool,
pub min_column_gap: f64,
}
impl Default for StructuredDataConfig {
fn default() -> Self {
Self {
min_table_rows: 2,
min_table_columns: 2,
column_alignment_tolerance: 5.0,
row_alignment_tolerance: 3.0,
detect_tables: true,
detect_key_value: true,
detect_multi_column: true,
min_column_gap: 20.0,
}
}
}
impl StructuredDataConfig {
pub fn new() -> Self {
Self::default()
}
pub fn with_min_table_rows(mut self, rows: usize) -> Self {
self.min_table_rows = rows;
self
}
pub fn with_min_table_columns(mut self, columns: usize) -> Self {
self.min_table_columns = columns;
self
}
pub fn with_column_tolerance(mut self, tolerance: f64) -> Self {
self.column_alignment_tolerance = tolerance;
self
}
pub fn with_row_tolerance(mut self, tolerance: f64) -> Self {
self.row_alignment_tolerance = tolerance;
self
}
pub fn with_table_detection(mut self, enabled: bool) -> Self {
self.detect_tables = enabled;
self
}
pub fn with_key_value_detection(mut self, enabled: bool) -> Self {
self.detect_key_value = enabled;
self
}
pub fn with_multi_column_detection(mut self, enabled: bool) -> Self {
self.detect_multi_column = enabled;
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ColumnBoundary {
pub x_position: f64,
pub gap_width: f64,
}
impl ColumnBoundary {
pub fn new(x_position: f64, gap_width: f64) -> Self {
Self {
x_position,
gap_width,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ColumnSection {
pub column_index: usize,
pub text: String,
pub bounding_box: BoundingBox,
}
impl ColumnSection {
pub fn new(column_index: usize, text: String, bounding_box: BoundingBox) -> Self {
Self {
column_index,
text,
bounding_box,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StructuredDataResult {
pub tables: Vec<Table>,
pub key_value_pairs: Vec<KeyValuePair>,
pub column_sections: Vec<ColumnSection>,
}
impl StructuredDataResult {
pub fn new() -> Self {
Self {
tables: Vec::new(),
key_value_pairs: Vec::new(),
column_sections: Vec::new(),
}
}
}
impl Default for StructuredDataResult {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bounding_box_basic() {
let bbox = BoundingBox::new(10.0, 20.0, 100.0, 50.0);
assert_eq!(bbox.x, 10.0);
assert_eq!(bbox.y, 20.0);
assert_eq!(bbox.width, 100.0);
assert_eq!(bbox.height, 50.0);
assert_eq!(bbox.right(), 110.0);
assert_eq!(bbox.top(), 70.0);
}
#[test]
fn test_bounding_box_contains() {
let bbox = BoundingBox::new(10.0, 20.0, 100.0, 50.0);
assert!(bbox.contains(50.0, 40.0)); assert!(bbox.contains(10.0, 20.0)); assert!(bbox.contains(110.0, 70.0)); assert!(!bbox.contains(5.0, 40.0)); assert!(!bbox.contains(120.0, 40.0)); }
#[test]
fn test_cell_operations() {
let bbox = BoundingBox::new(0.0, 0.0, 50.0, 20.0);
let mut cell = Cell::new(0, bbox);
assert!(cell.is_empty());
cell.add_text("Hello");
assert_eq!(cell.text, "Hello");
assert!(!cell.is_empty());
cell.add_text("World");
assert_eq!(cell.text, "Hello World");
}
#[test]
fn test_column_edges() {
let column = Column::new(100.0, 50.0, Alignment::Left);
assert_eq!(column.left(), 75.0);
assert_eq!(column.right(), 125.0);
}
#[test]
fn test_table_accessors() {
let bbox = BoundingBox::new(0.0, 0.0, 200.0, 100.0);
let cell = Cell::new(0, BoundingBox::new(0.0, 0.0, 50.0, 25.0));
let row = Row::new(vec![cell], 0.0, 25.0);
let column = Column::new(25.0, 50.0, Alignment::Left);
let table = Table::new(vec![row], vec![column], bbox, 0.95);
assert_eq!(table.row_count(), 1);
assert_eq!(table.column_count(), 1);
assert!(table.get_cell(0, 0).is_some());
assert!(table.get_cell(1, 0).is_none());
}
#[test]
fn test_config_builder() {
let config = StructuredDataConfig::new()
.with_min_table_rows(3)
.with_min_table_columns(4)
.with_column_tolerance(10.0)
.with_table_detection(false);
assert_eq!(config.min_table_rows, 3);
assert_eq!(config.min_table_columns, 4);
assert_eq!(config.column_alignment_tolerance, 10.0);
assert!(!config.detect_tables);
}
#[test]
fn test_alignment_default() {
assert_eq!(Alignment::default(), Alignment::Left);
}
}