Skip to main content

oxidize_pdf/semantic/
marking.rs

1//! Marking API for semantic regions
2
3use super::{Entity, EntityMetadata, EntityType};
4use crate::page::Page;
5
6/// Builder for creating marked entities
7pub struct EntityBuilder<'a> {
8    _page: &'a mut Page,
9    entity_type: EntityType,
10    bounds: (f64, f64, f64, f64),
11    metadata: EntityMetadata,
12}
13
14impl<'a> EntityBuilder<'a> {
15    pub(crate) fn new(
16        page: &'a mut Page,
17        entity_type: EntityType,
18        bounds: (f64, f64, f64, f64),
19    ) -> Self {
20        Self {
21            _page: page,
22            entity_type,
23            bounds,
24            metadata: EntityMetadata::new(),
25        }
26    }
27
28    /// Add a metadata property
29    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
30        self.metadata = self.metadata.with_property(key, value);
31        self
32    }
33
34    /// Set confidence score
35    pub fn with_confidence(mut self, confidence: f32) -> Self {
36        self.metadata = self.metadata.with_confidence(confidence);
37        self
38    }
39
40    /// Set schema URL
41    pub fn with_schema(mut self, schema: impl Into<String>) -> Self {
42        self.metadata = self.metadata.with_schema(schema);
43        self
44    }
45
46    /// Finalize the entity marking
47    pub fn build(self) -> String {
48        let id = format!("entity_{}", uuid_simple());
49        let _entity = Entity {
50            id: id.clone(),
51            entity_type: self.entity_type,
52            bounds: self.bounds,
53            page: 0, // Will be set by page
54            metadata: self.metadata,
55        };
56
57        // Store entity in page (implementation detail)
58        // self._page.add_entity(_entity);
59
60        id
61    }
62}
63
64/// Semantic marker for a page
65pub struct SemanticMarker<'a> {
66    page: &'a mut Page,
67}
68
69impl<'a> SemanticMarker<'a> {
70    pub fn new(page: &'a mut Page) -> Self {
71        Self { page }
72    }
73
74    /// Mark a region as a specific entity type
75    #[allow(mismatched_lifetime_syntaxes)]
76    pub fn mark(&mut self, entity_type: EntityType, bounds: (f64, f64, f64, f64)) -> EntityBuilder {
77        EntityBuilder::new(self.page, entity_type, bounds)
78    }
79
80    /// Mark text region
81    #[allow(mismatched_lifetime_syntaxes)]
82    pub fn mark_text(&mut self, bounds: (f64, f64, f64, f64)) -> EntityBuilder {
83        self.mark(EntityType::Text, bounds)
84    }
85
86    /// Mark image region
87    #[allow(mismatched_lifetime_syntaxes)]
88    pub fn mark_image(&mut self, bounds: (f64, f64, f64, f64)) -> EntityBuilder {
89        self.mark(EntityType::Image, bounds)
90    }
91
92    /// Mark table region
93    #[allow(mismatched_lifetime_syntaxes)]
94    pub fn mark_table(&mut self, bounds: (f64, f64, f64, f64)) -> EntityBuilder {
95        self.mark(EntityType::Table, bounds)
96    }
97}
98
99// Simple UUID generation for entity IDs
100pub fn uuid_simple() -> String {
101    use std::time::{SystemTime, UNIX_EPOCH};
102    let timestamp = SystemTime::now()
103        .duration_since(UNIX_EPOCH)
104        .unwrap_or_else(|_| std::time::Duration::from_secs(0))
105        .as_nanos();
106    format!("{:x}", timestamp)
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112
113    #[test]
114    fn test_uuid_simple_generates_unique_ids() {
115        let id1 = uuid_simple();
116        let id2 = uuid_simple();
117
118        // IDs should be non-empty hex strings
119        assert!(!id1.is_empty());
120        assert!(!id2.is_empty());
121
122        // All characters should be valid hex
123        for c in id1.chars() {
124            assert!(c.is_ascii_hexdigit());
125        }
126    }
127
128    #[test]
129    fn test_uuid_simple_format() {
130        let id = uuid_simple();
131
132        // Should be a valid hex string (non-empty, all hex chars)
133        assert!(!id.is_empty());
134        assert!(id.chars().all(|c| c.is_ascii_hexdigit()));
135    }
136
137    #[test]
138    fn test_entity_metadata_new() {
139        let metadata = EntityMetadata::new();
140        assert!(metadata.properties.is_empty());
141        assert!(metadata.confidence.is_none());
142        assert!(metadata.schema.is_none());
143    }
144
145    #[test]
146    fn test_entity_metadata_with_property() {
147        let metadata = EntityMetadata::new()
148            .with_property("key1", "value1")
149            .with_property("key2", "value2");
150
151        assert_eq!(metadata.properties.len(), 2);
152        assert_eq!(metadata.properties.get("key1"), Some(&"value1".to_string()));
153        assert_eq!(metadata.properties.get("key2"), Some(&"value2".to_string()));
154    }
155
156    #[test]
157    fn test_entity_metadata_with_confidence() {
158        let metadata = EntityMetadata::new().with_confidence(0.95);
159
160        assert_eq!(metadata.confidence, Some(0.95));
161    }
162
163    #[test]
164    fn test_entity_metadata_with_schema() {
165        let metadata = EntityMetadata::new().with_schema("https://schema.org/Person");
166
167        assert_eq!(
168            metadata.schema,
169            Some("https://schema.org/Person".to_string())
170        );
171    }
172
173    #[test]
174    fn test_entity_metadata_chaining() {
175        let metadata = EntityMetadata::new()
176            .with_property("name", "Test Entity")
177            .with_confidence(0.85)
178            .with_schema("https://example.com/schema");
179
180        assert_eq!(
181            metadata.properties.get("name"),
182            Some(&"Test Entity".to_string())
183        );
184        assert_eq!(metadata.confidence, Some(0.85));
185        assert_eq!(
186            metadata.schema,
187            Some("https://example.com/schema".to_string())
188        );
189    }
190
191    #[test]
192    fn test_entity_type_variants() {
193        // Test that all entity type variants exist
194        let _text = EntityType::Text;
195        let _image = EntityType::Image;
196        let _table = EntityType::Table;
197
198        assert!(true); // Just ensure variants are accessible
199    }
200
201    #[test]
202    fn test_entity_creation() {
203        let entity = Entity {
204            id: "test_entity_1".to_string(),
205            entity_type: EntityType::Text,
206            bounds: (10.0, 20.0, 100.0, 50.0),
207            page: 1,
208            metadata: EntityMetadata::new().with_confidence(0.9),
209        };
210
211        assert_eq!(entity.id, "test_entity_1");
212        assert!(matches!(entity.entity_type, EntityType::Text));
213        assert_eq!(entity.bounds, (10.0, 20.0, 100.0, 50.0));
214        assert_eq!(entity.page, 1);
215        assert_eq!(entity.metadata.confidence, Some(0.9));
216    }
217}