mod image;
mod path;
mod table;
mod text;
pub use image::{ColorSpace, ImageContent, ImageFormat};
pub use path::{LineCap, LineJoin, PathContent, PathOperation};
pub use table::{
TableCellAlign, TableCellContent, TableCellVAlign, TableContent, TableContentStyle,
TableDetectionInfo, TableRowContent, TableSource,
};
pub use text::{FontSpec, FontStyle, TextContent, TextStyle};
use crate::geometry::Rect;
#[derive(Debug, Clone)]
pub enum ContentElement {
Text(TextContent),
Image(ImageContent),
Path(PathContent),
Structure(StructureElement),
Table(TableContent),
}
impl ContentElement {
pub fn bbox(&self) -> Rect {
match self {
ContentElement::Text(t) => t.bbox,
ContentElement::Image(i) => i.bbox,
ContentElement::Path(p) => p.bbox,
ContentElement::Structure(s) => s.bbox,
ContentElement::Table(t) => t.bbox,
}
}
pub fn reading_order(&self) -> Option<usize> {
match self {
ContentElement::Text(t) => t.reading_order,
ContentElement::Image(i) => i.reading_order,
ContentElement::Path(p) => p.reading_order,
ContentElement::Structure(s) => s.reading_order,
ContentElement::Table(t) => t.reading_order,
}
}
pub fn is_text(&self) -> bool {
matches!(self, ContentElement::Text(_))
}
pub fn is_image(&self) -> bool {
matches!(self, ContentElement::Image(_))
}
pub fn is_path(&self) -> bool {
matches!(self, ContentElement::Path(_))
}
pub fn is_table(&self) -> bool {
matches!(self, ContentElement::Table(_))
}
pub fn as_text(&self) -> Option<&TextContent> {
match self {
ContentElement::Text(t) => Some(t),
_ => None,
}
}
pub fn as_image(&self) -> Option<&ImageContent> {
match self {
ContentElement::Image(i) => Some(i),
_ => None,
}
}
pub fn as_path(&self) -> Option<&PathContent> {
match self {
ContentElement::Path(p) => Some(p),
_ => None,
}
}
pub fn as_table(&self) -> Option<&TableContent> {
match self {
ContentElement::Table(t) => Some(t),
_ => None,
}
}
}
#[derive(Debug, Clone)]
pub struct StructureElement {
pub structure_type: String,
pub bbox: Rect,
pub children: Vec<ContentElement>,
pub reading_order: Option<usize>,
pub alt_text: Option<String>,
pub language: Option<String>,
}
impl Default for StructureElement {
fn default() -> Self {
Self {
structure_type: String::new(),
bbox: Rect::new(0.0, 0.0, 0.0, 0.0),
children: Vec::new(),
reading_order: None,
alt_text: None,
language: None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_content_element_bbox() {
let text = TextContent {
artifact_type: None,
text: "Test".to_string(),
bbox: Rect::new(10.0, 20.0, 50.0, 12.0),
font: FontSpec::default(),
style: TextStyle::default(),
reading_order: Some(0),
origin: None,
rotation_degrees: None,
matrix: None,
};
let element = ContentElement::Text(text);
let bbox = element.bbox();
assert_eq!(bbox.x, 10.0);
assert_eq!(bbox.y, 20.0);
assert_eq!(bbox.width, 50.0);
assert_eq!(bbox.height, 12.0);
}
#[test]
fn test_content_element_type_checks() {
let text = ContentElement::Text(TextContent {
artifact_type: None,
text: "Test".to_string(),
bbox: Rect::new(0.0, 0.0, 10.0, 10.0),
font: FontSpec::default(),
style: TextStyle::default(),
reading_order: None,
origin: None,
rotation_degrees: None,
matrix: None,
});
assert!(text.is_text());
assert!(!text.is_image());
assert!(!text.is_path());
assert!(text.as_text().is_some());
assert!(text.as_image().is_none());
}
#[test]
fn test_reading_order() {
let text = ContentElement::Text(TextContent {
artifact_type: None,
text: "First".to_string(),
bbox: Rect::new(0.0, 0.0, 10.0, 10.0),
font: FontSpec::default(),
style: TextStyle::default(),
reading_order: Some(5),
origin: None,
rotation_degrees: None,
matrix: None,
});
assert_eq!(text.reading_order(), Some(5));
}
}