1#[derive(Debug, Clone)]
8pub struct OcrWord {
9 pub text: String,
11 pub bbox_px: [u32; 4],
13 pub confidence: f32,
15}
16
17#[derive(Debug, Clone)]
19pub struct OcrPageResult {
20 pub words: Vec<OcrWord>,
22 pub confidence: f32,
24 pub image_width: u32,
26 pub image_height: u32,
28}
29
30impl OcrPageResult {
31 pub fn full_text(&self) -> String {
33 self.words
34 .iter()
35 .map(|w| w.text.as_str())
36 .collect::<Vec<_>>()
37 .join(" ")
38 }
39}
40
41pub trait OcrEngine: Send + Sync {
45 fn recognize(
53 &self,
54 image_data: &[u8],
55 width: u32,
56 height: u32,
57 dpi: u32,
58 ) -> std::result::Result<OcrPageResult, String>;
59
60 fn supported_languages(&self) -> Vec<String>;
62}
63
64#[derive(Debug, Default)]
68pub struct NoOpEngine;
69
70impl OcrEngine for NoOpEngine {
71 fn recognize(
72 &self,
73 _image_data: &[u8],
74 width: u32,
75 height: u32,
76 _dpi: u32,
77 ) -> std::result::Result<OcrPageResult, String> {
78 Ok(OcrPageResult {
79 words: Vec::new(),
80 confidence: 0.0,
81 image_width: width,
82 image_height: height,
83 })
84 }
85
86 fn supported_languages(&self) -> Vec<String> {
87 Vec::new()
88 }
89}
90
91#[cfg(test)]
92mod tests {
93 use super::*;
94
95 #[test]
96 fn noop_engine_returns_empty() {
97 let engine = NoOpEngine;
98 let result = engine.recognize(&[], 100, 100, 300).unwrap();
99 assert!(result.words.is_empty());
100 assert_eq!(result.confidence, 0.0);
101 assert_eq!(result.image_width, 100);
102 assert_eq!(result.image_height, 100);
103 assert!(engine.supported_languages().is_empty());
104 }
105
106 #[test]
107 fn ocr_page_result_full_text() {
108 let result = OcrPageResult {
109 words: vec![
110 OcrWord {
111 text: "Hello".to_string(),
112 bbox_px: [0, 0, 50, 20],
113 confidence: 0.95,
114 },
115 OcrWord {
116 text: "World".to_string(),
117 bbox_px: [60, 0, 110, 20],
118 confidence: 0.90,
119 },
120 ],
121 confidence: 0.92,
122 image_width: 200,
123 image_height: 100,
124 };
125 assert_eq!(result.full_text(), "Hello World");
126 }
127}