1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct OcrResult {
8 pub text: String,
10
11 pub markdown: String,
14
15 pub blocks: Vec<TextBlock>,
17
18 pub metadata: OcrMetadata,
20}
21
22impl OcrResult {
23 pub fn empty() -> Self {
25 Self {
26 text: String::new(),
27 markdown: String::new(),
28 blocks: Vec::new(),
29 metadata: OcrMetadata::default(),
30 }
31 }
32
33 pub fn from_text(text: impl Into<String>) -> Self {
35 let text = text.into();
36 Self {
37 markdown: text.clone(),
38 text,
39 blocks: Vec::new(),
40 metadata: OcrMetadata::default(),
41 }
42 }
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct TextBlock {
48 pub text: String,
50
51 pub bbox: [f32; 4],
54
55 pub confidence: f32,
57
58 pub role: BlockRole,
60
61 pub order: usize,
63}
64
65impl TextBlock {
66 pub fn new(text: impl Into<String>) -> Self {
68 Self {
69 text: text.into(),
70 bbox: [0.0, 0.0, 1.0, 1.0],
71 confidence: 1.0,
72 role: BlockRole::Text,
73 order: 0,
74 }
75 }
76
77 pub fn with_bbox(mut self, bbox: [f32; 4]) -> Self {
79 self.bbox = bbox;
80 self
81 }
82
83 pub fn with_confidence(mut self, confidence: f32) -> Self {
85 self.confidence = confidence;
86 self
87 }
88
89 pub fn with_role(mut self, role: BlockRole) -> Self {
91 self.role = role;
92 self
93 }
94
95 pub fn with_order(mut self, order: usize) -> Self {
97 self.order = order;
98 self
99 }
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
104#[serde(rename_all = "snake_case")]
105pub enum BlockRole {
106 Title,
108 Header,
110 #[default]
112 Text,
113 Table,
115 List,
117 Caption,
119 Footer,
121 PageNumber,
123 Code,
125 Quote,
127 Other,
129}
130
131impl BlockRole {
132 pub fn markdown_prefix(&self) -> &'static str {
134 match self {
135 BlockRole::Title => "# ",
136 BlockRole::Header => "## ",
137 BlockRole::List => "- ",
138 BlockRole::Quote => "> ",
139 BlockRole::Code => "```\n",
140 _ => "",
141 }
142 }
143
144 pub fn markdown_suffix(&self) -> &'static str {
146 match self {
147 BlockRole::Code => "\n```",
148 _ => "",
149 }
150 }
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize, Default)]
155pub struct OcrMetadata {
156 pub provider: String,
158
159 pub model: Option<String>,
161
162 pub processing_time_ms: u64,
164
165 pub image_size: Option<(u32, u32)>,
167
168 pub languages: Vec<String>,
170
171 pub page_count: u32,
173
174 pub current_page: u32,
176}
177
178#[derive(Debug, Clone)]
180pub enum ImageInput {
181 Bytes(Vec<u8>),
183
184 Base64(String),
186
187 Path(String),
189
190 Url(String),
192}
193
194impl ImageInput {
195 pub async fn to_bytes(&self) -> Result<Vec<u8>, VisionInputError> {
197 match self {
198 ImageInput::Bytes(bytes) => Ok(bytes.clone()),
199 ImageInput::Base64(encoded) => {
200 use base64::Engine;
201 base64::engine::general_purpose::STANDARD
202 .decode(encoded)
203 .map_err(|e| VisionInputError::Base64Decode(e.to_string()))
204 }
205 ImageInput::Path(path) => tokio::fs::read(path)
206 .await
207 .map_err(|e| VisionInputError::FileRead(e.to_string())),
208 ImageInput::Url(_url) => {
209 Err(VisionInputError::UrlNotSupported)
211 }
212 }
213 }
214}
215
216#[derive(Debug, thiserror::Error)]
218pub enum VisionInputError {
219 #[error("Failed to decode base64: {0}")]
220 Base64Decode(String),
221
222 #[error("Failed to read file: {0}")]
223 FileRead(String),
224
225 #[error("URL input is not supported in this build")]
226 UrlNotSupported,
227}
228
229#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
231#[serde(rename_all = "snake_case")]
232pub enum OutputFormat {
233 Text,
235 #[default]
237 Markdown,
238 Json,
240 All,
242}
243
244#[cfg(test)]
245mod tests {
246 use super::*;
247
248 #[test]
249 fn test_ocr_result_from_text() {
250 let result = OcrResult::from_text("Hello, World!");
251 assert_eq!(result.text, "Hello, World!");
252 assert_eq!(result.markdown, "Hello, World!");
253 assert!(result.blocks.is_empty());
254 }
255
256 #[test]
257 fn test_text_block_builder() {
258 let block = TextBlock::new("Test")
259 .with_bbox([0.1, 0.2, 0.3, 0.4])
260 .with_confidence(0.95)
261 .with_role(BlockRole::Header)
262 .with_order(1);
263
264 assert_eq!(block.text, "Test");
265 assert_eq!(block.bbox, [0.1, 0.2, 0.3, 0.4]);
266 assert_eq!(block.confidence, 0.95);
267 assert_eq!(block.role, BlockRole::Header);
268 assert_eq!(block.order, 1);
269 }
270
271 #[test]
272 fn test_block_role_markdown() {
273 assert_eq!(BlockRole::Title.markdown_prefix(), "# ");
274 assert_eq!(BlockRole::Header.markdown_prefix(), "## ");
275 assert_eq!(BlockRole::List.markdown_prefix(), "- ");
276 assert_eq!(BlockRole::Text.markdown_prefix(), "");
277 }
278
279 #[test]
280 fn test_ocr_result_serialization() {
281 let result = OcrResult::from_text("Test");
282 let json = serde_json::to_string(&result).unwrap();
283 let deserialized: OcrResult = serde_json::from_str(&json).unwrap();
284 assert_eq!(deserialized.text, result.text);
285 }
286}