Skip to main content

edgeparse_core/models/
enums.rs

1//! Enumerations for EdgeParse data models.
2
3use serde::{Deserialize, Serialize};
4
5/// Semantic type classification for PDF elements.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
7pub enum SemanticType {
8    /// Document root
9    Document,
10    /// Generic division
11    Div,
12    /// Text paragraph
13    Paragraph,
14    /// Inline span
15    Span,
16    /// Table element
17    Table,
18    /// Table headers section
19    TableHeaders,
20    /// Table footer section
21    TableFooter,
22    /// Table body section
23    TableBody,
24    /// Table row
25    TableRow,
26    /// Table header cell
27    TableHeader,
28    /// Table data cell
29    TableCell,
30    /// Form element
31    Form,
32    /// Hyperlink
33    Link,
34    /// Annotation
35    Annot,
36    /// Caption for image or table
37    Caption,
38    /// List container
39    List,
40    /// List item label
41    ListLabel,
42    /// List item body
43    ListBody,
44    /// List item
45    ListItem,
46    /// Table of contents
47    TableOfContent,
48    /// Table of contents item
49    TableOfContentItem,
50    /// Figure/image
51    Figure,
52    /// Numbered heading
53    NumberHeading,
54    /// Heading
55    Heading,
56    /// Title
57    Title,
58    /// Block quote
59    BlockQuote,
60    /// Footnote/endnote
61    Note,
62    /// Page header
63    Header,
64    /// Page footer
65    Footer,
66    /// Code block
67    Code,
68    /// Part/section
69    Part,
70}
71
72impl SemanticType {
73    /// Whether this type should be ignored in normal processing.
74    pub fn is_ignored_standard_type(&self) -> bool {
75        matches!(
76            self,
77            SemanticType::Div
78                | SemanticType::Span
79                | SemanticType::Form
80                | SemanticType::Link
81                | SemanticType::Annot
82        )
83    }
84}
85
86/// Text alignment within a block.
87#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
88pub enum TextAlignment {
89    /// Left-aligned
90    Left,
91    /// Right-aligned
92    Right,
93    /// Center-aligned
94    Center,
95    /// Justified
96    Justify,
97}
98
99/// Text format (baseline position).
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
101pub enum TextFormat {
102    /// Normal baseline
103    #[default]
104    Normal,
105    /// Superscript
106    Superscript,
107    /// Subscript
108    Subscript,
109}
110
111/// Text type classification.
112#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
113pub enum TextType {
114    /// Regular text
115    #[default]
116    Regular,
117    /// Large text
118    Large,
119    /// Logo/title text
120    Logo,
121}
122
123/// Processing layer that produced/modified an element.
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
125pub enum PdfLayer {
126    /// Main content layer (initial extraction)
127    #[default]
128    Main,
129    /// Raw content extraction
130    Content,
131    /// Table cell assignment
132    TableCells,
133    /// List item detection
134    ListItems,
135    /// Table content processing
136    TableContent,
137    /// List content processing
138    ListContent,
139    /// Text block processing
140    TextBlockContent,
141    /// Header and footer processing
142    HeaderAndFooterContent,
143}
144
145/// Triage decision for hybrid mode.
146#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
147pub enum TriageDecision {
148    /// Process locally (Rust pipeline)
149    Local,
150    /// Send to backend
151    Backend,
152    /// Use both and merge
153    Both,
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    #[test]
161    fn test_semantic_type_ignored() {
162        assert!(SemanticType::Div.is_ignored_standard_type());
163        assert!(SemanticType::Span.is_ignored_standard_type());
164        assert!(!SemanticType::Paragraph.is_ignored_standard_type());
165        assert!(!SemanticType::Heading.is_ignored_standard_type());
166        assert!(!SemanticType::Table.is_ignored_standard_type());
167    }
168
169    #[test]
170    fn test_text_format_default() {
171        assert_eq!(TextFormat::default(), TextFormat::Normal);
172    }
173}