oxidize_pdf/semantic/
marking.rs1use super::{Entity, EntityMetadata, EntityType};
4use crate::page::Page;
5
6pub struct EntityBuilder<'a> {
8 _page: &'a mut Page,
9 entity_type: EntityType,
10 bounds: (f64, f64, f64, f64),
11 metadata: EntityMetadata,
12}
13
14impl<'a> EntityBuilder<'a> {
15 pub(crate) fn new(
16 page: &'a mut Page,
17 entity_type: EntityType,
18 bounds: (f64, f64, f64, f64),
19 ) -> Self {
20 Self {
21 _page: page,
22 entity_type,
23 bounds,
24 metadata: EntityMetadata::new(),
25 }
26 }
27
28 pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
30 self.metadata = self.metadata.with_property(key, value);
31 self
32 }
33
34 pub fn with_confidence(mut self, confidence: f32) -> Self {
36 self.metadata = self.metadata.with_confidence(confidence);
37 self
38 }
39
40 pub fn with_schema(mut self, schema: impl Into<String>) -> Self {
42 self.metadata = self.metadata.with_schema(schema);
43 self
44 }
45
46 pub fn build(self) -> String {
48 let id = format!("entity_{}", uuid_simple());
49 let _entity = Entity {
50 id: id.clone(),
51 entity_type: self.entity_type,
52 bounds: self.bounds,
53 page: 0, metadata: self.metadata,
55 };
56
57 id
61 }
62}
63
64pub struct SemanticMarker<'a> {
66 page: &'a mut Page,
67}
68
69impl<'a> SemanticMarker<'a> {
70 pub fn new(page: &'a mut Page) -> Self {
71 Self { page }
72 }
73
74 #[allow(mismatched_lifetime_syntaxes)]
76 pub fn mark(&mut self, entity_type: EntityType, bounds: (f64, f64, f64, f64)) -> EntityBuilder {
77 EntityBuilder::new(self.page, entity_type, bounds)
78 }
79
80 #[allow(mismatched_lifetime_syntaxes)]
82 pub fn mark_text(&mut self, bounds: (f64, f64, f64, f64)) -> EntityBuilder {
83 self.mark(EntityType::Text, bounds)
84 }
85
86 #[allow(mismatched_lifetime_syntaxes)]
88 pub fn mark_image(&mut self, bounds: (f64, f64, f64, f64)) -> EntityBuilder {
89 self.mark(EntityType::Image, bounds)
90 }
91
92 #[allow(mismatched_lifetime_syntaxes)]
94 pub fn mark_table(&mut self, bounds: (f64, f64, f64, f64)) -> EntityBuilder {
95 self.mark(EntityType::Table, bounds)
96 }
97}
98
99pub fn uuid_simple() -> String {
101 use std::time::{SystemTime, UNIX_EPOCH};
102 let timestamp = SystemTime::now()
103 .duration_since(UNIX_EPOCH)
104 .unwrap_or_else(|_| std::time::Duration::from_secs(0))
105 .as_nanos();
106 format!("{:x}", timestamp)
107}
108
109#[cfg(test)]
110mod tests {
111 use super::*;
112
113 #[test]
114 fn test_uuid_simple_generates_unique_ids() {
115 let id1 = uuid_simple();
116 let id2 = uuid_simple();
117
118 assert!(!id1.is_empty());
120 assert!(!id2.is_empty());
121
122 for c in id1.chars() {
124 assert!(c.is_ascii_hexdigit());
125 }
126 }
127
128 #[test]
129 fn test_uuid_simple_format() {
130 let id = uuid_simple();
131
132 assert!(!id.is_empty());
134 assert!(id.chars().all(|c| c.is_ascii_hexdigit()));
135 }
136
137 #[test]
138 fn test_entity_metadata_new() {
139 let metadata = EntityMetadata::new();
140 assert!(metadata.properties.is_empty());
141 assert!(metadata.confidence.is_none());
142 assert!(metadata.schema.is_none());
143 }
144
145 #[test]
146 fn test_entity_metadata_with_property() {
147 let metadata = EntityMetadata::new()
148 .with_property("key1", "value1")
149 .with_property("key2", "value2");
150
151 assert_eq!(metadata.properties.len(), 2);
152 assert_eq!(metadata.properties.get("key1"), Some(&"value1".to_string()));
153 assert_eq!(metadata.properties.get("key2"), Some(&"value2".to_string()));
154 }
155
156 #[test]
157 fn test_entity_metadata_with_confidence() {
158 let metadata = EntityMetadata::new().with_confidence(0.95);
159
160 assert_eq!(metadata.confidence, Some(0.95));
161 }
162
163 #[test]
164 fn test_entity_metadata_with_schema() {
165 let metadata = EntityMetadata::new().with_schema("https://schema.org/Person");
166
167 assert_eq!(
168 metadata.schema,
169 Some("https://schema.org/Person".to_string())
170 );
171 }
172
173 #[test]
174 fn test_entity_metadata_chaining() {
175 let metadata = EntityMetadata::new()
176 .with_property("name", "Test Entity")
177 .with_confidence(0.85)
178 .with_schema("https://example.com/schema");
179
180 assert_eq!(
181 metadata.properties.get("name"),
182 Some(&"Test Entity".to_string())
183 );
184 assert_eq!(metadata.confidence, Some(0.85));
185 assert_eq!(
186 metadata.schema,
187 Some("https://example.com/schema".to_string())
188 );
189 }
190
191 #[test]
192 fn test_entity_type_variants() {
193 let _text = EntityType::Text;
195 let _image = EntityType::Image;
196 let _table = EntityType::Table;
197
198 assert!(true); }
200
201 #[test]
202 fn test_entity_creation() {
203 let entity = Entity {
204 id: "test_entity_1".to_string(),
205 entity_type: EntityType::Text,
206 bounds: (10.0, 20.0, 100.0, 50.0),
207 page: 1,
208 metadata: EntityMetadata::new().with_confidence(0.9),
209 };
210
211 assert_eq!(entity.id, "test_entity_1");
212 assert!(matches!(entity.entity_type, EntityType::Text));
213 assert_eq!(entity.bounds, (10.0, 20.0, 100.0, 50.0));
214 assert_eq!(entity.page, 1);
215 assert_eq!(entity.metadata.confidence, Some(0.9));
216 }
217}