use serde::{Deserialize, Serialize};
use std::sync::Arc;
use super::extraction::{BoundingBox, ExtractedImage};
use super::serde_helpers::serde_vec_arc;
use super::tables::Table;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub struct PageStructure {
pub total_count: usize,
pub unit_type: PageUnitType,
#[serde(skip_serializing_if = "Option::is_none")]
pub boundaries: Option<Vec<PageBoundary>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub pages: Option<Vec<PageInfo>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub enum PageUnitType {
Page,
Slide,
Sheet,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub struct PageBoundary {
pub byte_start: usize,
pub byte_end: usize,
pub page_number: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub struct PageInfo {
pub number: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dimensions: Option<(f64, f64)>,
#[serde(skip_serializing_if = "Option::is_none")]
pub image_count: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub table_count: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hidden: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub is_blank: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub struct PageContent {
pub page_number: usize,
pub content: String,
#[serde(skip_serializing_if = "Vec::is_empty", default, with = "serde_vec_arc")]
#[cfg_attr(feature = "api", schema(value_type = Vec<Table>))]
pub tables: Vec<Arc<Table>>,
#[serde(skip_serializing_if = "Vec::is_empty", default, with = "serde_vec_arc")]
#[cfg_attr(feature = "api", schema(value_type = Vec<ExtractedImage>))]
pub images: Vec<Arc<ExtractedImage>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hierarchy: Option<PageHierarchy>,
#[serde(skip_serializing_if = "Option::is_none")]
pub is_blank: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub layout_regions: Option<Vec<LayoutRegion>>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub struct LayoutRegion {
pub class: String,
pub confidence: f64,
pub bounding_box: BoundingBox,
pub area_fraction: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub struct PageHierarchy {
pub block_count: usize,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub blocks: Vec<HierarchicalBlock>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub struct HierarchicalBlock {
pub text: String,
pub font_size: f32,
pub level: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub bbox: Option<(f32, f32, f32, f32)>,
}