Skip to main content

edgeparse_core/models/
content.rs

1//! Unified ContentElement enum — all page content.
2
3use serde::{Deserialize, Serialize};
4
5use super::bbox::BoundingBox;
6use super::chunks::{ImageChunk, LineArtChunk, LineChunk, TextChunk};
7use super::list::PDFList;
8use super::semantic::{
9    SemanticCaption, SemanticFigure, SemanticFormula, SemanticHeaderOrFooter, SemanticHeading,
10    SemanticNumberHeading, SemanticParagraph, SemanticPicture, SemanticTable,
11};
12use super::table::TableBorder;
13use super::text::{TextBlock, TextLine};
14
15/// Unified enum for all content elements on a page.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub enum ContentElement {
18    /// Raw text chunk
19    TextChunk(TextChunk),
20    /// Grouped text line
21    TextLine(TextLine),
22    /// Grouped text block
23    TextBlock(TextBlock),
24    /// Image bounding box
25    Image(ImageChunk),
26    /// Line segment
27    Line(LineChunk),
28    /// Vector graphic
29    LineArt(LineArtChunk),
30    /// Table border structure
31    TableBorder(TableBorder),
32    /// List
33    List(PDFList),
34    /// Paragraph
35    Paragraph(SemanticParagraph),
36    /// Heading
37    Heading(SemanticHeading),
38    /// Numbered heading
39    NumberHeading(SemanticNumberHeading),
40    /// Caption
41    Caption(SemanticCaption),
42    /// Header or footer
43    HeaderFooter(SemanticHeaderOrFooter),
44    /// Figure
45    Figure(SemanticFigure),
46    /// Formula
47    Formula(SemanticFormula),
48    /// Picture with description
49    Picture(SemanticPicture),
50    /// Table (semantic wrapper)
51    Table(SemanticTable),
52}
53
54impl ContentElement {
55    /// Get the bounding box of this element.
56    pub fn bbox(&self) -> &BoundingBox {
57        match self {
58            Self::TextChunk(e) => &e.bbox,
59            Self::TextLine(e) => &e.bbox,
60            Self::TextBlock(e) => &e.bbox,
61            Self::Image(e) => &e.bbox,
62            Self::Line(e) => &e.bbox,
63            Self::LineArt(e) => &e.bbox,
64            Self::TableBorder(e) => &e.bbox,
65            Self::List(e) => &e.bbox,
66            Self::Paragraph(e) => &e.base.bbox,
67            Self::Heading(e) => &e.base.base.bbox,
68            Self::NumberHeading(e) => &e.base.base.base.bbox,
69            Self::Caption(e) => &e.base.bbox,
70            Self::HeaderFooter(e) => &e.bbox,
71            Self::Figure(e) => &e.bbox,
72            Self::Formula(e) => &e.bbox,
73            Self::Picture(e) => &e.bbox,
74            Self::Table(e) => &e.bbox,
75        }
76    }
77
78    /// Get the global index.
79    pub fn index(&self) -> Option<u32> {
80        match self {
81            Self::TextChunk(e) => e.index.map(|i| i as u32),
82            Self::TextLine(e) => e.index,
83            Self::TextBlock(e) => e.index,
84            Self::Image(e) => e.index,
85            Self::Line(e) => e.index,
86            Self::LineArt(e) => e.index,
87            Self::TableBorder(e) => e.index,
88            Self::List(e) => e.index,
89            Self::Paragraph(e) => e.base.index,
90            Self::Heading(e) => e.base.base.index,
91            Self::NumberHeading(e) => e.base.base.base.index,
92            Self::Caption(e) => e.base.index,
93            Self::HeaderFooter(e) => e.index,
94            Self::Figure(e) => e.index,
95            Self::Formula(e) => e.index,
96            Self::Picture(e) => e.index,
97            Self::Table(e) => e.index,
98        }
99    }
100
101    /// Get the page number.
102    pub fn page_number(&self) -> Option<u32> {
103        self.bbox().page_number
104    }
105
106    /// Set the global index.
107    pub fn set_index(&mut self, idx: u32) {
108        match self {
109            Self::TextChunk(e) => e.index = Some(idx as usize),
110            Self::TextLine(e) => e.index = Some(idx),
111            Self::TextBlock(e) => e.index = Some(idx),
112            Self::Image(e) => e.index = Some(idx),
113            Self::Line(e) => e.index = Some(idx),
114            Self::LineArt(e) => e.index = Some(idx),
115            Self::TableBorder(e) => e.index = Some(idx),
116            Self::List(e) => e.index = Some(idx),
117            Self::Paragraph(e) => e.base.index = Some(idx),
118            Self::Heading(e) => e.base.base.index = Some(idx),
119            Self::NumberHeading(e) => e.base.base.base.index = Some(idx),
120            Self::Caption(e) => e.base.index = Some(idx),
121            Self::HeaderFooter(e) => e.index = Some(idx),
122            Self::Figure(e) => e.index = Some(idx),
123            Self::Formula(e) => e.index = Some(idx),
124            Self::Picture(e) => e.index = Some(idx),
125            Self::Table(e) => e.index = Some(idx),
126        }
127    }
128}