1use crate::graphics::ImageFormat;
72use crate::operations::page_analysis::ContentAnalysis;
73use std::fmt;
74
75pub type OcrResult<T> = Result<T, OcrError>;
77
78#[derive(Debug, thiserror::Error)]
80pub enum OcrError {
81 #[error("OCR provider not available: {0}")]
83 ProviderNotAvailable(String),
84
85 #[error("Unsupported image format: {0:?}")]
87 UnsupportedImageFormat(ImageFormat),
88
89 #[error("Invalid image data: {0}")]
91 InvalidImageData(String),
92
93 #[error("OCR processing failed: {0}")]
95 ProcessingFailed(String),
96
97 #[error("Network error: {0}")]
99 NetworkError(String),
100
101 #[error("Authentication error: {0}")]
103 AuthenticationError(String),
104
105 #[error("Rate limit exceeded: {0}")]
107 RateLimitExceeded(String),
108
109 #[error("Low confidence results: {0}")]
111 LowConfidence(String),
112
113 #[error("IO error: {0}")]
115 Io(#[from] std::io::Error),
116
117 #[error("Configuration error: {0}")]
119 Configuration(String),
120}
121
122#[derive(Debug, Clone)]
124pub struct OcrOptions {
125 pub language: String,
127
128 pub min_confidence: f64,
130
131 pub preserve_layout: bool,
133
134 pub preprocessing: ImagePreprocessing,
136
137 pub engine_options: std::collections::HashMap<String, String>,
139
140 pub timeout_seconds: u32,
142}
143
144impl Default for OcrOptions {
145 fn default() -> Self {
146 Self {
147 language: "en".to_string(),
148 min_confidence: 0.6,
149 preserve_layout: true,
150 preprocessing: ImagePreprocessing::default(),
151 engine_options: std::collections::HashMap::new(),
152 timeout_seconds: 30,
153 }
154 }
155}
156
157#[derive(Debug, Clone)]
159pub struct ImagePreprocessing {
160 pub denoise: bool,
162
163 pub deskew: bool,
165
166 pub enhance_contrast: bool,
168
169 pub sharpen: bool,
171
172 pub scale_factor: f64,
174}
175
176impl Default for ImagePreprocessing {
177 fn default() -> Self {
178 Self {
179 denoise: true,
180 deskew: true,
181 enhance_contrast: true,
182 sharpen: false,
183 scale_factor: 1.0,
184 }
185 }
186}
187
188#[derive(Debug, Clone)]
190pub struct OcrTextFragment {
191 pub text: String,
193
194 pub x: f64,
196
197 pub y: f64,
199
200 pub width: f64,
202
203 pub height: f64,
205
206 pub confidence: f64,
208
209 pub font_size: f64,
211
212 pub fragment_type: FragmentType,
214}
215
216#[derive(Debug, Clone, Copy, PartialEq, Eq)]
218pub enum FragmentType {
219 Character,
221 Word,
223 Line,
225 Paragraph,
227}
228
229#[derive(Debug, Clone)]
231pub struct OcrProcessingResult {
232 pub text: String,
234
235 pub confidence: f64,
237
238 pub fragments: Vec<OcrTextFragment>,
240
241 pub processing_time_ms: u64,
243
244 pub engine_name: String,
246
247 pub language: String,
249
250 pub image_dimensions: (u32, u32),
252}
253
254impl OcrProcessingResult {
255 pub fn filter_by_confidence(&self, min_confidence: f64) -> Vec<&OcrTextFragment> {
257 self.fragments
258 .iter()
259 .filter(|fragment| fragment.confidence >= min_confidence)
260 .collect()
261 }
262
263 pub fn fragments_in_region(
265 &self,
266 x: f64,
267 y: f64,
268 width: f64,
269 height: f64,
270 ) -> Vec<&OcrTextFragment> {
271 self.fragments
272 .iter()
273 .filter(|fragment| {
274 fragment.x >= x
275 && fragment.y >= y
276 && fragment.x + fragment.width <= x + width
277 && fragment.y + fragment.height <= y + height
278 })
279 .collect()
280 }
281
282 pub fn fragments_of_type(&self, fragment_type: FragmentType) -> Vec<&OcrTextFragment> {
284 self.fragments
285 .iter()
286 .filter(|fragment| fragment.fragment_type == fragment_type)
287 .collect()
288 }
289
290 pub fn average_confidence(&self) -> f64 {
292 if self.fragments.is_empty() {
293 return 0.0;
294 }
295
296 let sum: f64 = self.fragments.iter().map(|f| f.confidence).sum();
297 sum / self.fragments.len() as f64
298 }
299}
300
301#[derive(Debug, Clone, Copy, PartialEq, Eq)]
303pub enum OcrEngine {
304 Mock,
306 Tesseract,
308 Azure,
310 Aws,
312 GoogleCloud,
314}
315
316impl OcrEngine {
317 pub fn name(&self) -> &'static str {
319 match self {
320 OcrEngine::Mock => "Mock OCR",
321 OcrEngine::Tesseract => "Tesseract",
322 OcrEngine::Azure => "Azure Computer Vision",
323 OcrEngine::Aws => "AWS Textract",
324 OcrEngine::GoogleCloud => "Google Cloud Vision",
325 }
326 }
327
328 pub fn supports_format(&self, format: ImageFormat) -> bool {
330 match self {
331 OcrEngine::Mock => true, OcrEngine::Tesseract => matches!(
333 format,
334 ImageFormat::Jpeg | ImageFormat::Png | ImageFormat::Tiff
335 ),
336 OcrEngine::Azure => matches!(format, ImageFormat::Jpeg | ImageFormat::Png),
337 OcrEngine::Aws => matches!(format, ImageFormat::Jpeg | ImageFormat::Png),
338 OcrEngine::GoogleCloud => matches!(format, ImageFormat::Jpeg | ImageFormat::Png),
339 }
340 }
341}
342
343impl fmt::Display for OcrEngine {
344 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
345 write!(f, "{}", self.name())
346 }
347}
348
349pub trait OcrProvider: Send + Sync {
397 fn process_image(
420 &self,
421 image_data: &[u8],
422 options: &OcrOptions,
423 ) -> OcrResult<OcrProcessingResult>;
424
425 fn process_page(
445 &self,
446 _page_analysis: &ContentAnalysis,
447 page_data: &[u8],
448 options: &OcrOptions,
449 ) -> OcrResult<OcrProcessingResult> {
450 self.process_image(page_data, options)
451 }
452
453 fn supported_formats(&self) -> Vec<ImageFormat>;
459
460 fn engine_name(&self) -> &str;
466
467 fn engine_type(&self) -> OcrEngine;
473
474 fn supports_format(&self, format: ImageFormat) -> bool {
484 self.supported_formats().contains(&format)
485 }
486
487 fn validate_image_data(&self, image_data: &[u8]) -> OcrResult<()> {
504 if image_data.len() < 8 {
505 return Err(OcrError::InvalidImageData(
506 "Image data too short".to_string(),
507 ));
508 }
509
510 let format = if image_data.starts_with(b"\xFF\xD8\xFF") {
512 ImageFormat::Jpeg
513 } else if image_data.starts_with(b"\x89PNG\r\n\x1a\n") {
514 ImageFormat::Png
515 } else if image_data.starts_with(b"II\x2A\x00") || image_data.starts_with(b"MM\x00\x2A") {
516 ImageFormat::Tiff
517 } else {
518 return Err(OcrError::InvalidImageData(
519 "Unrecognized image format".to_string(),
520 ));
521 };
522
523 if !self.supports_format(format) {
524 return Err(OcrError::UnsupportedImageFormat(format));
525 }
526
527 Ok(())
528 }
529}
530
531#[derive(Clone)]
549pub struct MockOcrProvider {
550 confidence: f64,
552 mock_text: String,
554 processing_delay_ms: u64,
556}
557
558impl MockOcrProvider {
559 pub fn new() -> Self {
561 Self {
562 confidence: 0.85,
563 mock_text: "Mock OCR extracted text from scanned image".to_string(),
564 processing_delay_ms: 100,
565 }
566 }
567
568 pub fn with_text_and_confidence(text: String, confidence: f64) -> Self {
570 Self {
571 confidence,
572 mock_text: text,
573 processing_delay_ms: 100,
574 }
575 }
576
577 pub fn set_mock_text(&mut self, text: String) {
579 self.mock_text = text;
580 }
581
582 pub fn set_confidence(&mut self, confidence: f64) {
584 self.confidence = confidence.clamp(0.0, 1.0);
585 }
586
587 pub fn set_processing_delay(&mut self, delay_ms: u64) {
589 self.processing_delay_ms = delay_ms;
590 }
591}
592
593impl Default for MockOcrProvider {
594 fn default() -> Self {
595 Self::new()
596 }
597}
598
599impl OcrProvider for MockOcrProvider {
600 fn process_image(
601 &self,
602 image_data: &[u8],
603 options: &OcrOptions,
604 ) -> OcrResult<OcrProcessingResult> {
605 self.validate_image_data(image_data)?;
607
608 std::thread::sleep(std::time::Duration::from_millis(self.processing_delay_ms));
610
611 let fragments = vec![
613 OcrTextFragment {
614 text: self.mock_text.clone(),
615 x: 50.0,
616 y: 700.0,
617 width: 200.0,
618 height: 20.0,
619 confidence: self.confidence,
620 font_size: 12.0,
621 fragment_type: FragmentType::Line,
622 },
623 OcrTextFragment {
624 text: "Additional mock text".to_string(),
625 x: 50.0,
626 y: 680.0,
627 width: 150.0,
628 height: 20.0,
629 confidence: self.confidence * 0.9,
630 font_size: 12.0,
631 fragment_type: FragmentType::Line,
632 },
633 ];
634
635 Ok(OcrProcessingResult {
636 text: format!("{}\nAdditional mock text", self.mock_text),
637 confidence: self.confidence,
638 fragments,
639 processing_time_ms: self.processing_delay_ms,
640 engine_name: "Mock OCR".to_string(),
641 language: options.language.clone(),
642 image_dimensions: (800, 600), })
644 }
645
646 fn supported_formats(&self) -> Vec<ImageFormat> {
647 vec![ImageFormat::Jpeg, ImageFormat::Png, ImageFormat::Tiff]
648 }
649
650 fn engine_name(&self) -> &str {
651 "Mock OCR"
652 }
653
654 fn engine_type(&self) -> OcrEngine {
655 OcrEngine::Mock
656 }
657}
658
659#[cfg(test)]
660mod tests {
661 use super::*;
662
663 #[test]
664 fn test_ocr_options_default() {
665 let options = OcrOptions::default();
666 assert_eq!(options.language, "en");
667 assert_eq!(options.min_confidence, 0.6);
668 assert!(options.preserve_layout);
669 assert_eq!(options.timeout_seconds, 30);
670 }
671
672 #[test]
673 fn test_image_preprocessing_default() {
674 let preprocessing = ImagePreprocessing::default();
675 assert!(preprocessing.denoise);
676 assert!(preprocessing.deskew);
677 assert!(preprocessing.enhance_contrast);
678 assert!(!preprocessing.sharpen);
679 assert_eq!(preprocessing.scale_factor, 1.0);
680 }
681
682 #[test]
683 fn test_ocr_engine_name() {
684 assert_eq!(OcrEngine::Mock.name(), "Mock OCR");
685 assert_eq!(OcrEngine::Tesseract.name(), "Tesseract");
686 assert_eq!(OcrEngine::Azure.name(), "Azure Computer Vision");
687 }
688
689 #[test]
690 fn test_ocr_engine_supports_format() {
691 assert!(OcrEngine::Mock.supports_format(ImageFormat::Jpeg));
692 assert!(OcrEngine::Mock.supports_format(ImageFormat::Png));
693 assert!(OcrEngine::Mock.supports_format(ImageFormat::Tiff));
694
695 assert!(OcrEngine::Tesseract.supports_format(ImageFormat::Jpeg));
696 assert!(OcrEngine::Tesseract.supports_format(ImageFormat::Png));
697 assert!(OcrEngine::Tesseract.supports_format(ImageFormat::Tiff));
698
699 assert!(OcrEngine::Azure.supports_format(ImageFormat::Jpeg));
700 assert!(OcrEngine::Azure.supports_format(ImageFormat::Png));
701 assert!(!OcrEngine::Azure.supports_format(ImageFormat::Tiff));
702 }
703
704 #[test]
705 fn test_fragment_type_equality() {
706 assert_eq!(FragmentType::Word, FragmentType::Word);
707 assert_ne!(FragmentType::Word, FragmentType::Line);
708 assert_ne!(FragmentType::Character, FragmentType::Paragraph);
709 }
710
711 #[test]
712 fn test_mock_ocr_provider_creation() {
713 let provider = MockOcrProvider::new();
714 assert_eq!(provider.confidence, 0.85);
715 assert!(provider.mock_text.contains("Mock OCR"));
716 assert_eq!(provider.processing_delay_ms, 100);
717 }
718
719 #[test]
720 fn test_mock_ocr_provider_with_custom_text() {
721 let custom_text = "Custom mock text".to_string();
722 let provider = MockOcrProvider::with_text_and_confidence(custom_text.clone(), 0.95);
723 assert_eq!(provider.mock_text, custom_text);
724 assert_eq!(provider.confidence, 0.95);
725 }
726
727 #[test]
728 fn test_mock_ocr_provider_process_image() {
729 let provider = MockOcrProvider::new();
730 let options = OcrOptions::default();
731
732 let jpeg_data = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
734
735 let result = provider.process_image(&jpeg_data, &options).unwrap();
736 assert!(result.text.contains("Mock OCR"));
737 assert_eq!(result.confidence, 0.85);
738 assert!(!result.fragments.is_empty());
739 assert_eq!(result.engine_name, "Mock OCR");
740 assert_eq!(result.language, "en");
741 }
742
743 #[test]
744 fn test_mock_ocr_provider_supported_formats() {
745 let provider = MockOcrProvider::new();
746 let formats = provider.supported_formats();
747 assert!(formats.contains(&ImageFormat::Jpeg));
748 assert!(formats.contains(&ImageFormat::Png));
749 assert!(formats.contains(&ImageFormat::Tiff));
750 }
751
752 #[test]
753 fn test_mock_ocr_provider_engine_info() {
754 let provider = MockOcrProvider::new();
755 assert_eq!(provider.engine_name(), "Mock OCR");
756 assert_eq!(provider.engine_type(), OcrEngine::Mock);
757 }
758
759 #[test]
760 fn test_mock_ocr_provider_supports_format() {
761 let provider = MockOcrProvider::new();
762 assert!(provider.supports_format(ImageFormat::Jpeg));
763 assert!(provider.supports_format(ImageFormat::Png));
764 assert!(provider.supports_format(ImageFormat::Tiff));
765 }
766
767 #[test]
768 fn test_mock_ocr_provider_validate_image_data() {
769 let provider = MockOcrProvider::new();
770
771 let jpeg_data = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
773 assert!(provider.validate_image_data(&jpeg_data).is_ok());
774
775 let short_data = vec![0xFF, 0xD8];
777 assert!(provider.validate_image_data(&short_data).is_err());
778
779 let invalid_data = vec![0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09];
781 assert!(provider.validate_image_data(&invalid_data).is_err());
782 }
783
784 #[test]
785 fn test_ocr_processing_result_filter_by_confidence() {
786 let result = OcrProcessingResult {
787 text: "Test text".to_string(),
788 confidence: 0.8,
789 fragments: vec![
790 OcrTextFragment {
791 text: "High confidence".to_string(),
792 x: 0.0,
793 y: 0.0,
794 width: 100.0,
795 height: 20.0,
796 confidence: 0.9,
797 font_size: 12.0,
798 fragment_type: FragmentType::Word,
799 },
800 OcrTextFragment {
801 text: "Low confidence".to_string(),
802 x: 0.0,
803 y: 20.0,
804 width: 100.0,
805 height: 20.0,
806 confidence: 0.5,
807 font_size: 12.0,
808 fragment_type: FragmentType::Word,
809 },
810 ],
811 processing_time_ms: 100,
812 engine_name: "Test".to_string(),
813 language: "en".to_string(),
814 image_dimensions: (800, 600),
815 };
816
817 let high_confidence = result.filter_by_confidence(0.8);
818 assert_eq!(high_confidence.len(), 1);
819 assert_eq!(high_confidence[0].text, "High confidence");
820 }
821
822 #[test]
823 fn test_ocr_processing_result_fragments_in_region() {
824 let result = OcrProcessingResult {
825 text: "Test text".to_string(),
826 confidence: 0.8,
827 fragments: vec![
828 OcrTextFragment {
829 text: "Inside region".to_string(),
830 x: 10.0,
831 y: 10.0,
832 width: 80.0,
833 height: 20.0,
834 confidence: 0.9,
835 font_size: 12.0,
836 fragment_type: FragmentType::Word,
837 },
838 OcrTextFragment {
839 text: "Outside region".to_string(),
840 x: 200.0,
841 y: 200.0,
842 width: 80.0,
843 height: 20.0,
844 confidence: 0.9,
845 font_size: 12.0,
846 fragment_type: FragmentType::Word,
847 },
848 ],
849 processing_time_ms: 100,
850 engine_name: "Test".to_string(),
851 language: "en".to_string(),
852 image_dimensions: (800, 600),
853 };
854
855 let in_region = result.fragments_in_region(0.0, 0.0, 100.0, 100.0);
856 assert_eq!(in_region.len(), 1);
857 assert_eq!(in_region[0].text, "Inside region");
858 }
859
860 #[test]
861 fn test_ocr_processing_result_fragments_of_type() {
862 let result = OcrProcessingResult {
863 text: "Test text".to_string(),
864 confidence: 0.8,
865 fragments: vec![
866 OcrTextFragment {
867 text: "Word fragment".to_string(),
868 x: 0.0,
869 y: 0.0,
870 width: 100.0,
871 height: 20.0,
872 confidence: 0.9,
873 font_size: 12.0,
874 fragment_type: FragmentType::Word,
875 },
876 OcrTextFragment {
877 text: "Line fragment".to_string(),
878 x: 0.0,
879 y: 20.0,
880 width: 200.0,
881 height: 20.0,
882 confidence: 0.9,
883 font_size: 12.0,
884 fragment_type: FragmentType::Line,
885 },
886 ],
887 processing_time_ms: 100,
888 engine_name: "Test".to_string(),
889 language: "en".to_string(),
890 image_dimensions: (800, 600),
891 };
892
893 let words = result.fragments_of_type(FragmentType::Word);
894 assert_eq!(words.len(), 1);
895 assert_eq!(words[0].text, "Word fragment");
896
897 let lines = result.fragments_of_type(FragmentType::Line);
898 assert_eq!(lines.len(), 1);
899 assert_eq!(lines[0].text, "Line fragment");
900 }
901
902 #[test]
903 fn test_ocr_processing_result_average_confidence() {
904 let result = OcrProcessingResult {
905 text: "Test text".to_string(),
906 confidence: 0.8,
907 fragments: vec![
908 OcrTextFragment {
909 text: "Fragment 1".to_string(),
910 x: 0.0,
911 y: 0.0,
912 width: 100.0,
913 height: 20.0,
914 confidence: 0.8,
915 font_size: 12.0,
916 fragment_type: FragmentType::Word,
917 },
918 OcrTextFragment {
919 text: "Fragment 2".to_string(),
920 x: 0.0,
921 y: 20.0,
922 width: 100.0,
923 height: 20.0,
924 confidence: 0.6,
925 font_size: 12.0,
926 fragment_type: FragmentType::Word,
927 },
928 ],
929 processing_time_ms: 100,
930 engine_name: "Test".to_string(),
931 language: "en".to_string(),
932 image_dimensions: (800, 600),
933 };
934
935 let avg_confidence = result.average_confidence();
936 assert_eq!(avg_confidence, 0.7);
937 }
938
939 #[test]
940 fn test_ocr_processing_result_average_confidence_empty() {
941 let result = OcrProcessingResult {
942 text: "Test text".to_string(),
943 confidence: 0.8,
944 fragments: vec![],
945 processing_time_ms: 100,
946 engine_name: "Test".to_string(),
947 language: "en".to_string(),
948 image_dimensions: (800, 600),
949 };
950
951 let avg_confidence = result.average_confidence();
952 assert_eq!(avg_confidence, 0.0);
953 }
954}