1use crate::graphics::ImageFormat;
72use crate::operations::page_analysis::ContentAnalysis;
73use std::fmt;
74
75pub type OcrResult<T> = Result<T, OcrError>;
77
78#[derive(Debug, thiserror::Error)]
80pub enum OcrError {
81 #[error("OCR provider not available: {0}")]
83 ProviderNotAvailable(String),
84
85 #[error("Unsupported image format: {0:?}")]
87 UnsupportedImageFormat(ImageFormat),
88
89 #[error("Invalid image data: {0}")]
91 InvalidImageData(String),
92
93 #[error("OCR processing failed: {0}")]
95 ProcessingFailed(String),
96
97 #[error("Network error: {0}")]
99 NetworkError(String),
100
101 #[error("Authentication error: {0}")]
103 AuthenticationError(String),
104
105 #[error("Rate limit exceeded: {0}")]
107 RateLimitExceeded(String),
108
109 #[error("Low confidence results: {0}")]
111 LowConfidence(String),
112
113 #[error("IO error: {0}")]
115 Io(#[from] std::io::Error),
116
117 #[error("Configuration error: {0}")]
119 Configuration(String),
120}
121
122#[derive(Debug, Clone)]
124pub struct OcrOptions {
125 pub language: String,
127
128 pub min_confidence: f64,
130
131 pub preserve_layout: bool,
133
134 pub preprocessing: ImagePreprocessing,
136
137 pub engine_options: std::collections::HashMap<String, String>,
139
140 pub timeout_seconds: u32,
142}
143
144impl Default for OcrOptions {
145 fn default() -> Self {
146 Self {
147 language: "en".to_string(),
148 min_confidence: 0.6,
149 preserve_layout: true,
150 preprocessing: ImagePreprocessing::default(),
151 engine_options: std::collections::HashMap::new(),
152 timeout_seconds: 30,
153 }
154 }
155}
156
157#[derive(Debug, Clone)]
159pub struct ImagePreprocessing {
160 pub denoise: bool,
162
163 pub deskew: bool,
165
166 pub enhance_contrast: bool,
168
169 pub sharpen: bool,
171
172 pub scale_factor: f64,
174}
175
176impl Default for ImagePreprocessing {
177 fn default() -> Self {
178 Self {
179 denoise: true,
180 deskew: true,
181 enhance_contrast: true,
182 sharpen: false,
183 scale_factor: 1.0,
184 }
185 }
186}
187
188#[derive(Debug, Clone)]
190pub struct OcrTextFragment {
191 pub text: String,
193
194 pub x: f64,
196
197 pub y: f64,
199
200 pub width: f64,
202
203 pub height: f64,
205
206 pub confidence: f64,
208
209 pub font_size: f64,
211
212 pub fragment_type: FragmentType,
214}
215
216#[derive(Debug, Clone, Copy, PartialEq, Eq)]
218pub enum FragmentType {
219 Character,
221 Word,
223 Line,
225 Paragraph,
227}
228
229#[derive(Debug, Clone)]
231pub struct OcrProcessingResult {
232 pub text: String,
234
235 pub confidence: f64,
237
238 pub fragments: Vec<OcrTextFragment>,
240
241 pub processing_time_ms: u64,
243
244 pub engine_name: String,
246
247 pub language: String,
249
250 pub image_dimensions: (u32, u32),
252}
253
254impl OcrProcessingResult {
255 pub fn filter_by_confidence(&self, min_confidence: f64) -> Vec<&OcrTextFragment> {
257 self.fragments
258 .iter()
259 .filter(|fragment| fragment.confidence >= min_confidence)
260 .collect()
261 }
262
263 pub fn fragments_in_region(
265 &self,
266 x: f64,
267 y: f64,
268 width: f64,
269 height: f64,
270 ) -> Vec<&OcrTextFragment> {
271 self.fragments
272 .iter()
273 .filter(|fragment| {
274 fragment.x >= x
275 && fragment.y >= y
276 && fragment.x + fragment.width <= x + width
277 && fragment.y + fragment.height <= y + height
278 })
279 .collect()
280 }
281
282 pub fn fragments_of_type(&self, fragment_type: FragmentType) -> Vec<&OcrTextFragment> {
284 self.fragments
285 .iter()
286 .filter(|fragment| fragment.fragment_type == fragment_type)
287 .collect()
288 }
289
290 pub fn average_confidence(&self) -> f64 {
292 if self.fragments.is_empty() {
293 return 0.0;
294 }
295
296 let sum: f64 = self.fragments.iter().map(|f| f.confidence).sum();
297 sum / self.fragments.len() as f64
298 }
299}
300
301#[derive(Debug, Clone, Copy, PartialEq, Eq)]
303pub enum OcrEngine {
304 Mock,
306 Tesseract,
308 Azure,
310 Aws,
312 GoogleCloud,
314}
315
316impl OcrEngine {
317 pub fn name(&self) -> &'static str {
319 match self {
320 OcrEngine::Mock => "Mock OCR",
321 OcrEngine::Tesseract => "Tesseract",
322 OcrEngine::Azure => "Azure Computer Vision",
323 OcrEngine::Aws => "AWS Textract",
324 OcrEngine::GoogleCloud => "Google Cloud Vision",
325 }
326 }
327
328 pub fn supports_format(&self, format: ImageFormat) -> bool {
330 match self {
331 OcrEngine::Mock => true, OcrEngine::Tesseract => matches!(
333 format,
334 ImageFormat::Jpeg | ImageFormat::Png | ImageFormat::Tiff
335 ),
336 OcrEngine::Azure => matches!(format, ImageFormat::Jpeg | ImageFormat::Png),
337 OcrEngine::Aws => matches!(format, ImageFormat::Jpeg | ImageFormat::Png),
338 OcrEngine::GoogleCloud => matches!(format, ImageFormat::Jpeg | ImageFormat::Png),
339 }
340 }
341}
342
343impl fmt::Display for OcrEngine {
344 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
345 write!(f, "{}", self.name())
346 }
347}
348
349pub trait OcrProvider: Send + Sync {
397 fn process_image(
420 &self,
421 image_data: &[u8],
422 options: &OcrOptions,
423 ) -> OcrResult<OcrProcessingResult>;
424
425 fn process_page(
445 &self,
446 _page_analysis: &ContentAnalysis,
447 page_data: &[u8],
448 options: &OcrOptions,
449 ) -> OcrResult<OcrProcessingResult> {
450 self.process_image(page_data, options)
451 }
452
453 fn supported_formats(&self) -> Vec<ImageFormat>;
459
460 fn engine_name(&self) -> &str;
466
467 fn engine_type(&self) -> OcrEngine;
473
474 fn supports_format(&self, format: ImageFormat) -> bool {
484 self.supported_formats().contains(&format)
485 }
486
487 fn validate_image_data(&self, image_data: &[u8]) -> OcrResult<()> {
504 if image_data.len() < 8 {
505 return Err(OcrError::InvalidImageData(
506 "Image data too short".to_string(),
507 ));
508 }
509
510 let format = if image_data.starts_with(b"\xFF\xD8\xFF") {
512 ImageFormat::Jpeg
513 } else if image_data.starts_with(b"\x89PNG\r\n\x1a\n") {
514 ImageFormat::Png
515 } else if image_data.starts_with(b"II\x2A\x00") || image_data.starts_with(b"MM\x00\x2A") {
516 ImageFormat::Tiff
517 } else {
518 return Err(OcrError::InvalidImageData(
519 "Unrecognized image format".to_string(),
520 ));
521 };
522
523 if !self.supports_format(format) {
524 return Err(OcrError::UnsupportedImageFormat(format));
525 }
526
527 Ok(())
528 }
529}
530
531#[derive(Clone)]
549pub struct MockOcrProvider {
550 confidence: f64,
552 mock_text: String,
554 processing_delay_ms: u64,
556}
557
558impl MockOcrProvider {
559 pub fn new() -> Self {
561 Self {
562 confidence: 0.85,
563 mock_text: "Mock OCR extracted text from scanned image".to_string(),
564 processing_delay_ms: 100,
565 }
566 }
567
568 pub fn with_text_and_confidence(text: String, confidence: f64) -> Self {
570 Self {
571 confidence,
572 mock_text: text,
573 processing_delay_ms: 100,
574 }
575 }
576
577 pub fn set_mock_text(&mut self, text: String) {
579 self.mock_text = text;
580 }
581
582 pub fn set_confidence(&mut self, confidence: f64) {
584 self.confidence = confidence.clamp(0.0, 1.0);
585 }
586
587 pub fn set_processing_delay(&mut self, delay_ms: u64) {
589 self.processing_delay_ms = delay_ms;
590 }
591}
592
593impl Default for MockOcrProvider {
594 fn default() -> Self {
595 Self::new()
596 }
597}
598
599impl OcrProvider for MockOcrProvider {
600 fn process_image(
601 &self,
602 image_data: &[u8],
603 options: &OcrOptions,
604 ) -> OcrResult<OcrProcessingResult> {
605 self.validate_image_data(image_data)?;
607
608 std::thread::sleep(std::time::Duration::from_millis(self.processing_delay_ms));
610
611 let fragments = vec![
613 OcrTextFragment {
614 text: self.mock_text.clone(),
615 x: 50.0,
616 y: 700.0,
617 width: 200.0,
618 height: 20.0,
619 confidence: self.confidence,
620 font_size: 12.0,
621 fragment_type: FragmentType::Line,
622 },
623 OcrTextFragment {
624 text: "Additional mock text".to_string(),
625 x: 50.0,
626 y: 680.0,
627 width: 150.0,
628 height: 20.0,
629 confidence: self.confidence * 0.9,
630 font_size: 12.0,
631 fragment_type: FragmentType::Line,
632 },
633 ];
634
635 Ok(OcrProcessingResult {
636 text: format!("{}\nAdditional mock text", self.mock_text),
637 confidence: self.confidence,
638 fragments,
639 processing_time_ms: self.processing_delay_ms,
640 engine_name: "Mock OCR".to_string(),
641 language: options.language.clone(),
642 image_dimensions: (800, 600), })
644 }
645
646 fn supported_formats(&self) -> Vec<ImageFormat> {
647 vec![ImageFormat::Jpeg, ImageFormat::Png, ImageFormat::Tiff]
648 }
649
650 fn engine_name(&self) -> &str {
651 "Mock OCR"
652 }
653
654 fn engine_type(&self) -> OcrEngine {
655 OcrEngine::Mock
656 }
657}
658
659#[cfg(test)]
660mod tests {
661 use super::*;
662
663 #[test]
664 fn test_ocr_options_default() {
665 let options = OcrOptions::default();
666 assert_eq!(options.language, "en");
667 assert_eq!(options.min_confidence, 0.6);
668 assert!(options.preserve_layout);
669 assert_eq!(options.timeout_seconds, 30);
670 }
671
672 #[test]
673 fn test_image_preprocessing_default() {
674 let preprocessing = ImagePreprocessing::default();
675 assert!(preprocessing.denoise);
676 assert!(preprocessing.deskew);
677 assert!(preprocessing.enhance_contrast);
678 assert!(!preprocessing.sharpen);
679 assert_eq!(preprocessing.scale_factor, 1.0);
680 }
681
682 #[test]
683 fn test_ocr_engine_name() {
684 assert_eq!(OcrEngine::Mock.name(), "Mock OCR");
685 assert_eq!(OcrEngine::Tesseract.name(), "Tesseract");
686 assert_eq!(OcrEngine::Azure.name(), "Azure Computer Vision");
687 }
688
689 #[test]
690 fn test_ocr_engine_supports_format() {
691 assert!(OcrEngine::Mock.supports_format(ImageFormat::Jpeg));
692 assert!(OcrEngine::Mock.supports_format(ImageFormat::Png));
693 assert!(OcrEngine::Mock.supports_format(ImageFormat::Tiff));
694
695 assert!(OcrEngine::Tesseract.supports_format(ImageFormat::Jpeg));
696 assert!(OcrEngine::Tesseract.supports_format(ImageFormat::Png));
697 assert!(OcrEngine::Tesseract.supports_format(ImageFormat::Tiff));
698
699 assert!(OcrEngine::Azure.supports_format(ImageFormat::Jpeg));
700 assert!(OcrEngine::Azure.supports_format(ImageFormat::Png));
701 assert!(!OcrEngine::Azure.supports_format(ImageFormat::Tiff));
702 }
703
704 #[test]
705 fn test_fragment_type_equality() {
706 assert_eq!(FragmentType::Word, FragmentType::Word);
707 assert_ne!(FragmentType::Word, FragmentType::Line);
708 assert_ne!(FragmentType::Character, FragmentType::Paragraph);
709 }
710
711 #[test]
712 fn test_mock_ocr_provider_creation() {
713 let provider = MockOcrProvider::new();
714 assert_eq!(provider.confidence, 0.85);
715 assert!(provider.mock_text.contains("Mock OCR"));
716 assert_eq!(provider.processing_delay_ms, 100);
717 }
718
719 #[test]
720 fn test_mock_ocr_provider_with_custom_text() {
721 let custom_text = "Custom mock text".to_string();
722 let provider = MockOcrProvider::with_text_and_confidence(custom_text.clone(), 0.95);
723 assert_eq!(provider.mock_text, custom_text);
724 assert_eq!(provider.confidence, 0.95);
725 }
726
727 #[test]
728 fn test_mock_ocr_provider_process_image() {
729 let provider = MockOcrProvider::new();
730 let options = OcrOptions::default();
731
732 let jpeg_data = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
734
735 let result = provider.process_image(&jpeg_data, &options).unwrap();
736 assert!(result.text.contains("Mock OCR"));
737 assert_eq!(result.confidence, 0.85);
738 assert!(!result.fragments.is_empty());
739 assert_eq!(result.engine_name, "Mock OCR");
740 assert_eq!(result.language, "en");
741 }
742
743 #[test]
744 fn test_mock_ocr_provider_supported_formats() {
745 let provider = MockOcrProvider::new();
746 let formats = provider.supported_formats();
747 assert!(formats.contains(&ImageFormat::Jpeg));
748 assert!(formats.contains(&ImageFormat::Png));
749 assert!(formats.contains(&ImageFormat::Tiff));
750 }
751
752 #[test]
753 fn test_mock_ocr_provider_engine_info() {
754 let provider = MockOcrProvider::new();
755 assert_eq!(provider.engine_name(), "Mock OCR");
756 assert_eq!(provider.engine_type(), OcrEngine::Mock);
757 }
758
759 #[test]
760 fn test_mock_ocr_provider_supports_format() {
761 let provider = MockOcrProvider::new();
762 assert!(provider.supports_format(ImageFormat::Jpeg));
763 assert!(provider.supports_format(ImageFormat::Png));
764 assert!(provider.supports_format(ImageFormat::Tiff));
765 }
766
767 #[test]
768 fn test_mock_ocr_provider_validate_image_data() {
769 let provider = MockOcrProvider::new();
770
771 let jpeg_data = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
773 assert!(provider.validate_image_data(&jpeg_data).is_ok());
774
775 let short_data = vec![0xFF, 0xD8];
777 assert!(provider.validate_image_data(&short_data).is_err());
778
779 let invalid_data = vec![0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09];
781 assert!(provider.validate_image_data(&invalid_data).is_err());
782 }
783
784 #[test]
785 fn test_ocr_processing_result_filter_by_confidence() {
786 let result = OcrProcessingResult {
787 text: "Test text".to_string(),
788 confidence: 0.8,
789 fragments: vec![
790 OcrTextFragment {
791 text: "High confidence".to_string(),
792 x: 0.0,
793 y: 0.0,
794 width: 100.0,
795 height: 20.0,
796 confidence: 0.9,
797 font_size: 12.0,
798 fragment_type: FragmentType::Word,
799 },
800 OcrTextFragment {
801 text: "Low confidence".to_string(),
802 x: 0.0,
803 y: 20.0,
804 width: 100.0,
805 height: 20.0,
806 confidence: 0.5,
807 font_size: 12.0,
808 fragment_type: FragmentType::Word,
809 },
810 ],
811 processing_time_ms: 100,
812 engine_name: "Test".to_string(),
813 language: "en".to_string(),
814 image_dimensions: (800, 600),
815 };
816
817 let high_confidence = result.filter_by_confidence(0.8);
818 assert_eq!(high_confidence.len(), 1);
819 assert_eq!(high_confidence[0].text, "High confidence");
820 }
821
822 #[test]
823 fn test_ocr_processing_result_fragments_in_region() {
824 let result = OcrProcessingResult {
825 text: "Test text".to_string(),
826 confidence: 0.8,
827 fragments: vec![
828 OcrTextFragment {
829 text: "Inside region".to_string(),
830 x: 10.0,
831 y: 10.0,
832 width: 80.0,
833 height: 20.0,
834 confidence: 0.9,
835 font_size: 12.0,
836 fragment_type: FragmentType::Word,
837 },
838 OcrTextFragment {
839 text: "Outside region".to_string(),
840 x: 200.0,
841 y: 200.0,
842 width: 80.0,
843 height: 20.0,
844 confidence: 0.9,
845 font_size: 12.0,
846 fragment_type: FragmentType::Word,
847 },
848 ],
849 processing_time_ms: 100,
850 engine_name: "Test".to_string(),
851 language: "en".to_string(),
852 image_dimensions: (800, 600),
853 };
854
855 let in_region = result.fragments_in_region(0.0, 0.0, 100.0, 100.0);
856 assert_eq!(in_region.len(), 1);
857 assert_eq!(in_region[0].text, "Inside region");
858 }
859
860 #[test]
861 fn test_ocr_processing_result_fragments_of_type() {
862 let result = OcrProcessingResult {
863 text: "Test text".to_string(),
864 confidence: 0.8,
865 fragments: vec![
866 OcrTextFragment {
867 text: "Word fragment".to_string(),
868 x: 0.0,
869 y: 0.0,
870 width: 100.0,
871 height: 20.0,
872 confidence: 0.9,
873 font_size: 12.0,
874 fragment_type: FragmentType::Word,
875 },
876 OcrTextFragment {
877 text: "Line fragment".to_string(),
878 x: 0.0,
879 y: 20.0,
880 width: 200.0,
881 height: 20.0,
882 confidence: 0.9,
883 font_size: 12.0,
884 fragment_type: FragmentType::Line,
885 },
886 ],
887 processing_time_ms: 100,
888 engine_name: "Test".to_string(),
889 language: "en".to_string(),
890 image_dimensions: (800, 600),
891 };
892
893 let words = result.fragments_of_type(FragmentType::Word);
894 assert_eq!(words.len(), 1);
895 assert_eq!(words[0].text, "Word fragment");
896
897 let lines = result.fragments_of_type(FragmentType::Line);
898 assert_eq!(lines.len(), 1);
899 assert_eq!(lines[0].text, "Line fragment");
900 }
901
902 #[test]
903 fn test_ocr_processing_result_average_confidence() {
904 let result = OcrProcessingResult {
905 text: "Test text".to_string(),
906 confidence: 0.8,
907 fragments: vec![
908 OcrTextFragment {
909 text: "Fragment 1".to_string(),
910 x: 0.0,
911 y: 0.0,
912 width: 100.0,
913 height: 20.0,
914 confidence: 0.8,
915 font_size: 12.0,
916 fragment_type: FragmentType::Word,
917 },
918 OcrTextFragment {
919 text: "Fragment 2".to_string(),
920 x: 0.0,
921 y: 20.0,
922 width: 100.0,
923 height: 20.0,
924 confidence: 0.6,
925 font_size: 12.0,
926 fragment_type: FragmentType::Word,
927 },
928 ],
929 processing_time_ms: 100,
930 engine_name: "Test".to_string(),
931 language: "en".to_string(),
932 image_dimensions: (800, 600),
933 };
934
935 let avg_confidence = result.average_confidence();
936 assert_eq!(avg_confidence, 0.7);
937 }
938
939 #[test]
940 fn test_ocr_processing_result_average_confidence_empty() {
941 let result = OcrProcessingResult {
942 text: "Test text".to_string(),
943 confidence: 0.8,
944 fragments: vec![],
945 processing_time_ms: 100,
946 engine_name: "Test".to_string(),
947 language: "en".to_string(),
948 image_dimensions: (800, 600),
949 };
950
951 let avg_confidence = result.average_confidence();
952 assert_eq!(avg_confidence, 0.0);
953 }
954
955 mod comprehensive_tests {
957 use super::*;
958 use std::collections::HashMap;
959
960 #[test]
962 fn test_ocr_error_display() {
963 let errors = vec![
964 OcrError::ProviderNotAvailable("Tesseract not installed".to_string()),
965 OcrError::UnsupportedImageFormat(ImageFormat::Tiff),
966 OcrError::InvalidImageData("Corrupted header".to_string()),
967 OcrError::ProcessingFailed("OCR engine crashed".to_string()),
968 OcrError::NetworkError("Connection timeout".to_string()),
969 OcrError::AuthenticationError("Invalid API key".to_string()),
970 OcrError::RateLimitExceeded("429 Too Many Requests".to_string()),
971 OcrError::LowConfidence("Confidence below threshold".to_string()),
972 OcrError::Configuration("Missing language pack".to_string()),
973 ];
974
975 for error in errors {
976 let display = format!("{}", error);
977 assert!(!display.is_empty());
978
979 match &error {
981 OcrError::ProviderNotAvailable(msg) => assert!(display.contains(msg)),
982 OcrError::UnsupportedImageFormat(_) => {
983 assert!(display.contains("Unsupported image format"))
984 }
985 OcrError::InvalidImageData(msg) => assert!(display.contains(msg)),
986 OcrError::ProcessingFailed(msg) => assert!(display.contains(msg)),
987 OcrError::NetworkError(msg) => assert!(display.contains(msg)),
988 OcrError::AuthenticationError(msg) => assert!(display.contains(msg)),
989 OcrError::RateLimitExceeded(msg) => assert!(display.contains(msg)),
990 OcrError::LowConfidence(msg) => assert!(display.contains(msg)),
991 OcrError::Configuration(msg) => assert!(display.contains(msg)),
992 _ => {}
993 }
994 }
995 }
996
997 #[test]
998 fn test_ocr_error_from_io_error() {
999 use std::io::{Error as IoError, ErrorKind};
1000
1001 let io_error = IoError::new(ErrorKind::NotFound, "File not found");
1002 let ocr_error: OcrError = io_error.into();
1003
1004 match ocr_error {
1005 OcrError::Io(_) => {
1006 let display = format!("{}", ocr_error);
1007 assert!(display.contains("IO error"));
1008 }
1009 _ => panic!("Expected OcrError::Io variant"),
1010 }
1011 }
1012
1013 #[test]
1014 fn test_ocr_error_debug_format() {
1015 let error = OcrError::ProcessingFailed("Test error".to_string());
1016 let debug_str = format!("{:?}", error);
1017 assert!(debug_str.contains("ProcessingFailed"));
1018 assert!(debug_str.contains("Test error"));
1019 }
1020
1021 #[test]
1023 fn test_ocr_options_custom_language() {
1024 let mut options = OcrOptions::default();
1025 assert_eq!(options.language, "en");
1026
1027 options.language = "spa+eng".to_string();
1028 assert_eq!(options.language, "spa+eng");
1029
1030 options.language = "jpn".to_string();
1031 assert_eq!(options.language, "jpn");
1032 }
1033
1034 #[test]
1035 fn test_ocr_options_confidence_threshold() {
1036 let mut options = OcrOptions::default();
1037 assert_eq!(options.min_confidence, 0.6);
1038
1039 options.min_confidence = 0.0;
1041 assert_eq!(options.min_confidence, 0.0);
1042
1043 options.min_confidence = 1.0;
1044 assert_eq!(options.min_confidence, 1.0);
1045
1046 options.min_confidence = 0.85;
1047 assert_eq!(options.min_confidence, 0.85);
1048 }
1049
1050 #[test]
1051 fn test_ocr_options_engine_specific() {
1052 let mut options = OcrOptions::default();
1053 assert!(options.engine_options.is_empty());
1054
1055 options.engine_options.insert(
1057 "tessedit_char_whitelist".to_string(),
1058 "0123456789".to_string(),
1059 );
1060 options
1061 .engine_options
1062 .insert("tessedit_ocr_engine_mode".to_string(), "3".to_string());
1063
1064 assert_eq!(options.engine_options.len(), 2);
1065 assert_eq!(
1066 options.engine_options.get("tessedit_char_whitelist"),
1067 Some(&"0123456789".to_string())
1068 );
1069 }
1070
1071 #[test]
1072 fn test_ocr_options_clone() {
1073 let mut options = OcrOptions {
1074 language: "fra".to_string(),
1075 min_confidence: 0.75,
1076 preserve_layout: false,
1077 preprocessing: ImagePreprocessing {
1078 denoise: false,
1079 deskew: true,
1080 enhance_contrast: false,
1081 sharpen: true,
1082 scale_factor: 1.5,
1083 },
1084 engine_options: HashMap::new(),
1085 timeout_seconds: 60,
1086 };
1087
1088 options
1089 .engine_options
1090 .insert("key".to_string(), "value".to_string());
1091
1092 let cloned = options.clone();
1093 assert_eq!(cloned.language, options.language);
1094 assert_eq!(cloned.min_confidence, options.min_confidence);
1095 assert_eq!(cloned.preserve_layout, options.preserve_layout);
1096 assert_eq!(
1097 cloned.preprocessing.scale_factor,
1098 options.preprocessing.scale_factor
1099 );
1100 assert_eq!(cloned.engine_options.get("key"), Some(&"value".to_string()));
1101 assert_eq!(cloned.timeout_seconds, options.timeout_seconds);
1102 }
1103
1104 #[test]
1105 fn test_ocr_options_timeout_configuration() {
1106 let mut options = OcrOptions::default();
1107 assert_eq!(options.timeout_seconds, 30);
1108
1109 options.timeout_seconds = 0; assert_eq!(options.timeout_seconds, 0);
1111
1112 options.timeout_seconds = 300; assert_eq!(options.timeout_seconds, 300);
1114 }
1115
1116 #[test]
1118 fn test_image_preprocessing_combinations() {
1119 let test_cases = vec![
1120 (true, true, true, true),
1121 (false, false, false, false),
1122 (true, false, true, false),
1123 (false, true, false, true),
1124 ];
1125
1126 for (denoise, deskew, enhance, sharpen) in test_cases {
1127 let preprocessing = ImagePreprocessing {
1128 denoise,
1129 deskew,
1130 enhance_contrast: enhance,
1131 sharpen,
1132 scale_factor: 1.0,
1133 };
1134
1135 assert_eq!(preprocessing.denoise, denoise);
1136 assert_eq!(preprocessing.deskew, deskew);
1137 assert_eq!(preprocessing.enhance_contrast, enhance);
1138 assert_eq!(preprocessing.sharpen, sharpen);
1139 }
1140 }
1141
1142 #[test]
1143 fn test_image_preprocessing_scale_factor() {
1144 let mut preprocessing = ImagePreprocessing::default();
1145 assert_eq!(preprocessing.scale_factor, 1.0);
1146
1147 preprocessing.scale_factor = 0.5;
1149 assert_eq!(preprocessing.scale_factor, 0.5);
1150
1151 preprocessing.scale_factor = 2.0;
1152 assert_eq!(preprocessing.scale_factor, 2.0);
1153
1154 preprocessing.scale_factor = 1.25;
1155 assert_eq!(preprocessing.scale_factor, 1.25);
1156 }
1157
1158 #[test]
1159 fn test_image_preprocessing_clone() {
1160 let preprocessing = ImagePreprocessing {
1161 denoise: false,
1162 deskew: true,
1163 enhance_contrast: false,
1164 sharpen: true,
1165 scale_factor: 1.5,
1166 };
1167
1168 let cloned = preprocessing.clone();
1169 assert_eq!(cloned.denoise, preprocessing.denoise);
1170 assert_eq!(cloned.deskew, preprocessing.deskew);
1171 assert_eq!(cloned.enhance_contrast, preprocessing.enhance_contrast);
1172 assert_eq!(cloned.sharpen, preprocessing.sharpen);
1173 assert_eq!(cloned.scale_factor, preprocessing.scale_factor);
1174 }
1175
1176 #[test]
1178 fn test_ocr_text_fragment_creation() {
1179 let fragment = OcrTextFragment {
1180 text: "Hello World".to_string(),
1181 x: 100.0,
1182 y: 200.0,
1183 width: 150.0,
1184 height: 25.0,
1185 confidence: 0.92,
1186 font_size: 14.0,
1187 fragment_type: FragmentType::Line,
1188 };
1189
1190 assert_eq!(fragment.text, "Hello World");
1191 assert_eq!(fragment.x, 100.0);
1192 assert_eq!(fragment.y, 200.0);
1193 assert_eq!(fragment.width, 150.0);
1194 assert_eq!(fragment.height, 25.0);
1195 assert_eq!(fragment.confidence, 0.92);
1196 assert_eq!(fragment.font_size, 14.0);
1197 assert_eq!(fragment.fragment_type, FragmentType::Line);
1198 }
1199
1200 #[test]
1201 fn test_ocr_text_fragment_clone() {
1202 let fragment = OcrTextFragment {
1203 text: "Test".to_string(),
1204 x: 50.0,
1205 y: 100.0,
1206 width: 40.0,
1207 height: 15.0,
1208 confidence: 0.88,
1209 font_size: 11.0,
1210 fragment_type: FragmentType::Word,
1211 };
1212
1213 let cloned = fragment.clone();
1214 assert_eq!(cloned.text, fragment.text);
1215 assert_eq!(cloned.x, fragment.x);
1216 assert_eq!(cloned.confidence, fragment.confidence);
1217 assert_eq!(cloned.fragment_type, fragment.fragment_type);
1218 }
1219
1220 #[test]
1221 fn test_fragment_type_copy() {
1222 let ft1 = FragmentType::Character;
1223 let ft2 = ft1; assert_eq!(ft1, ft2);
1225 assert_eq!(ft1, FragmentType::Character);
1226 }
1227
1228 #[test]
1229 fn test_fragment_position_calculations() {
1230 let fragment = OcrTextFragment {
1231 text: "Test".to_string(),
1232 x: 100.0,
1233 y: 200.0,
1234 width: 50.0,
1235 height: 20.0,
1236 confidence: 0.9,
1237 font_size: 12.0,
1238 fragment_type: FragmentType::Word,
1239 };
1240
1241 let right = fragment.x + fragment.width;
1243 let bottom = fragment.y + fragment.height;
1244
1245 assert_eq!(right, 150.0);
1246 assert_eq!(bottom, 220.0);
1247 }
1248
1249 #[test]
1251 fn test_ocr_result_complex_region_filtering() {
1252 let fragments = vec![
1253 OcrTextFragment {
1254 text: "A".to_string(),
1255 x: 10.0,
1256 y: 10.0,
1257 width: 20.0,
1258 height: 20.0,
1259 confidence: 0.9,
1260 font_size: 12.0,
1261 fragment_type: FragmentType::Character,
1262 },
1263 OcrTextFragment {
1264 text: "B".to_string(),
1265 x: 25.0,
1266 y: 10.0,
1267 width: 20.0,
1268 height: 20.0,
1269 confidence: 0.9,
1270 font_size: 12.0,
1271 fragment_type: FragmentType::Character,
1272 },
1273 OcrTextFragment {
1274 text: "C".to_string(),
1275 x: 10.0,
1276 y: 35.0,
1277 width: 20.0,
1278 height: 20.0,
1279 confidence: 0.9,
1280 font_size: 12.0,
1281 fragment_type: FragmentType::Character,
1282 },
1283 OcrTextFragment {
1284 text: "D".to_string(),
1285 x: 100.0,
1286 y: 100.0,
1287 width: 20.0,
1288 height: 20.0,
1289 confidence: 0.9,
1290 font_size: 12.0,
1291 fragment_type: FragmentType::Character,
1292 },
1293 ];
1294
1295 let result = OcrProcessingResult {
1296 text: "ABCD".to_string(),
1297 confidence: 0.9,
1298 fragments,
1299 processing_time_ms: 50,
1300 engine_name: "Test".to_string(),
1301 language: "en".to_string(),
1302 image_dimensions: (200, 200),
1303 };
1304
1305 let region1 = result.fragments_in_region(0.0, 0.0, 50.0, 50.0);
1307 assert_eq!(region1.len(), 2); let region2 = result.fragments_in_region(10.0, 10.0, 20.0, 20.0);
1311 assert_eq!(region2.len(), 1); let region3 = result.fragments_in_region(200.0, 200.0, 50.0, 50.0);
1315 assert_eq!(region3.len(), 0);
1316 }
1317
1318 #[test]
1319 fn test_ocr_result_confidence_edge_cases() {
1320 let fragments = vec![
1321 OcrTextFragment {
1322 text: "Perfect".to_string(),
1323 x: 0.0,
1324 y: 0.0,
1325 width: 100.0,
1326 height: 20.0,
1327 confidence: 1.0,
1328 font_size: 12.0,
1329 fragment_type: FragmentType::Word,
1330 },
1331 OcrTextFragment {
1332 text: "Zero".to_string(),
1333 x: 0.0,
1334 y: 25.0,
1335 width: 50.0,
1336 height: 20.0,
1337 confidence: 0.0,
1338 font_size: 12.0,
1339 fragment_type: FragmentType::Word,
1340 },
1341 OcrTextFragment {
1342 text: "Mid".to_string(),
1343 x: 0.0,
1344 y: 50.0,
1345 width: 30.0,
1346 height: 20.0,
1347 confidence: 0.5,
1348 font_size: 12.0,
1349 fragment_type: FragmentType::Word,
1350 },
1351 ];
1352
1353 let result = OcrProcessingResult {
1354 text: "Perfect Zero Mid".to_string(),
1355 confidence: 0.5,
1356 fragments,
1357 processing_time_ms: 50,
1358 engine_name: "Test".to_string(),
1359 language: "en".to_string(),
1360 image_dimensions: (200, 200),
1361 };
1362
1363 assert_eq!(result.filter_by_confidence(0.0).len(), 3);
1365 assert_eq!(result.filter_by_confidence(0.5).len(), 2);
1366 assert_eq!(result.filter_by_confidence(1.0).len(), 1);
1367 assert_eq!(result.filter_by_confidence(1.1).len(), 0);
1368 }
1369
1370 #[test]
1371 fn test_ocr_result_fragment_type_combinations() {
1372 let fragments = vec![
1373 OcrTextFragment {
1374 text: "A".to_string(),
1375 x: 0.0,
1376 y: 0.0,
1377 width: 10.0,
1378 height: 20.0,
1379 confidence: 0.9,
1380 font_size: 12.0,
1381 fragment_type: FragmentType::Character,
1382 },
1383 OcrTextFragment {
1384 text: "Word".to_string(),
1385 x: 20.0,
1386 y: 0.0,
1387 width: 40.0,
1388 height: 20.0,
1389 confidence: 0.9,
1390 font_size: 12.0,
1391 fragment_type: FragmentType::Word,
1392 },
1393 OcrTextFragment {
1394 text: "Line of text".to_string(),
1395 x: 0.0,
1396 y: 25.0,
1397 width: 100.0,
1398 height: 20.0,
1399 confidence: 0.9,
1400 font_size: 12.0,
1401 fragment_type: FragmentType::Line,
1402 },
1403 OcrTextFragment {
1404 text: "Paragraph text...".to_string(),
1405 x: 0.0,
1406 y: 50.0,
1407 width: 200.0,
1408 height: 100.0,
1409 confidence: 0.9,
1410 font_size: 12.0,
1411 fragment_type: FragmentType::Paragraph,
1412 },
1413 ];
1414
1415 let result = OcrProcessingResult {
1416 text: "Combined".to_string(),
1417 confidence: 0.9,
1418 fragments,
1419 processing_time_ms: 50,
1420 engine_name: "Test".to_string(),
1421 language: "en".to_string(),
1422 image_dimensions: (300, 300),
1423 };
1424
1425 assert_eq!(result.fragments_of_type(FragmentType::Character).len(), 1);
1426 assert_eq!(result.fragments_of_type(FragmentType::Word).len(), 1);
1427 assert_eq!(result.fragments_of_type(FragmentType::Line).len(), 1);
1428 assert_eq!(result.fragments_of_type(FragmentType::Paragraph).len(), 1);
1429 }
1430
1431 #[test]
1432 fn test_ocr_result_large_fragment_set() {
1433 let mut fragments = Vec::new();
1435 for i in 0..1000 {
1436 fragments.push(OcrTextFragment {
1437 text: format!("Fragment{}", i),
1438 x: (i % 10) as f64 * 50.0,
1439 y: (i / 10) as f64 * 20.0,
1440 width: 45.0,
1441 height: 18.0,
1442 confidence: 0.5 + (i as f64 % 50.0) / 100.0,
1443 font_size: 12.0,
1444 fragment_type: if i % 4 == 0 {
1445 FragmentType::Line
1446 } else {
1447 FragmentType::Word
1448 },
1449 });
1450 }
1451
1452 let result = OcrProcessingResult {
1453 text: "Large document".to_string(),
1454 confidence: 0.75,
1455 fragments,
1456 processing_time_ms: 500,
1457 engine_name: "Test".to_string(),
1458 language: "en".to_string(),
1459 image_dimensions: (500, 2000),
1460 };
1461
1462 let high_conf = result.filter_by_confidence(0.8);
1464 assert!(high_conf.len() < 1000);
1465
1466 let lines = result.fragments_of_type(FragmentType::Line);
1467 assert_eq!(lines.len(), 250); let region = result.fragments_in_region(0.0, 0.0, 200.0, 200.0);
1470 assert!(!region.is_empty());
1471
1472 let avg = result.average_confidence();
1473 assert!(avg > 0.5 && avg < 1.0);
1474 }
1475
1476 #[test]
1477 fn test_ocr_result_empty_handling() {
1478 let result = OcrProcessingResult {
1479 text: String::new(),
1480 confidence: 0.0,
1481 fragments: vec![],
1482 processing_time_ms: 10,
1483 engine_name: "Test".to_string(),
1484 language: "en".to_string(),
1485 image_dimensions: (0, 0),
1486 };
1487
1488 assert_eq!(result.filter_by_confidence(0.5).len(), 0);
1489 assert_eq!(result.fragments_in_region(0.0, 0.0, 100.0, 100.0).len(), 0);
1490 assert_eq!(result.fragments_of_type(FragmentType::Word).len(), 0);
1491 assert_eq!(result.average_confidence(), 0.0);
1492 }
1493
1494 #[test]
1496 fn test_mock_provider_configuration_mutations() {
1497 let mut provider = MockOcrProvider::new();
1498
1499 provider.set_mock_text("Custom mock text".to_string());
1501
1502 provider.set_confidence(0.95);
1504
1505 provider.set_processing_delay(200);
1507
1508 let options = OcrOptions::default();
1509 let jpeg_data = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
1510
1511 let result = provider.process_image(&jpeg_data, &options).unwrap();
1512 assert!(result.text.contains("Custom mock text"));
1513 assert_eq!(result.confidence, 0.95);
1514 assert_eq!(result.processing_time_ms, 200);
1515 }
1516
1517 #[test]
1518 fn test_mock_provider_confidence_clamping() {
1519 let mut provider = MockOcrProvider::new();
1520
1521 provider.set_confidence(1.5);
1523 assert_eq!(provider.confidence, 1.0);
1524
1525 provider.set_confidence(-0.5);
1527 assert_eq!(provider.confidence, 0.0);
1528
1529 provider.set_confidence(0.75);
1531 assert_eq!(provider.confidence, 0.75);
1532 }
1533
1534 #[test]
1535 fn test_mock_provider_validate_png() {
1536 let provider = MockOcrProvider::new();
1537
1538 let png_data = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
1540 assert!(provider.validate_image_data(&png_data).is_ok());
1541
1542 let bad_png = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0B];
1544 assert!(provider.validate_image_data(&bad_png).is_err());
1545 }
1546
1547 #[test]
1548 fn test_mock_provider_validate_tiff() {
1549 let provider = MockOcrProvider::new();
1550
1551 let tiff_le = vec![0x49, 0x49, 0x2A, 0x00, 0x00, 0x00, 0x00, 0x00];
1553 assert!(provider.validate_image_data(&tiff_le).is_ok());
1554
1555 let tiff_be = vec![0x4D, 0x4D, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x00];
1557 assert!(provider.validate_image_data(&tiff_be).is_ok());
1558 }
1559
1560 #[test]
1561 fn test_mock_provider_process_page() {
1562 let provider = MockOcrProvider::new();
1563 let options = OcrOptions::default();
1564
1565 let analysis = ContentAnalysis {
1567 page_number: 0,
1568 page_type: crate::operations::page_analysis::PageType::Scanned,
1569 text_ratio: 0.0,
1570 image_ratio: 1.0,
1571 blank_space_ratio: 0.0,
1572 text_fragment_count: 0,
1573 image_count: 1,
1574 character_count: 0,
1575 };
1576
1577 let jpeg_data = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
1578
1579 let result = provider
1581 .process_page(&analysis, &jpeg_data, &options)
1582 .unwrap();
1583 assert!(result.text.contains("Mock OCR"));
1584 }
1585
1586 #[test]
1587 fn test_mock_provider_thread_safety() {
1588 use std::sync::Arc;
1589 use std::thread;
1590
1591 let provider = Arc::new(MockOcrProvider::new());
1592 let options = Arc::new(OcrOptions::default());
1593
1594 let mut handles = vec![];
1595
1596 for i in 0..5 {
1598 let provider_clone = Arc::clone(&provider);
1599 let options_clone = Arc::clone(&options);
1600
1601 let handle = thread::spawn(move || {
1602 let jpeg_data =
1603 vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
1604 let result = provider_clone
1605 .process_image(&jpeg_data, &options_clone)
1606 .unwrap();
1607 assert!(result.text.contains("Mock OCR"));
1608 i
1609 });
1610
1611 handles.push(handle);
1612 }
1613
1614 for handle in handles {
1616 let thread_id = handle.join().unwrap();
1617 assert!(thread_id < 5);
1618 }
1619 }
1620
1621 #[test]
1623 fn test_ocr_engine_display() {
1624 assert_eq!(format!("{}", OcrEngine::Mock), "Mock OCR");
1625 assert_eq!(format!("{}", OcrEngine::Tesseract), "Tesseract");
1626 assert_eq!(format!("{}", OcrEngine::Azure), "Azure Computer Vision");
1627 assert_eq!(format!("{}", OcrEngine::Aws), "AWS Textract");
1628 assert_eq!(format!("{}", OcrEngine::GoogleCloud), "Google Cloud Vision");
1629 }
1630
1631 #[test]
1632 fn test_ocr_engine_equality() {
1633 assert_eq!(OcrEngine::Mock, OcrEngine::Mock);
1634 assert_ne!(OcrEngine::Mock, OcrEngine::Tesseract);
1635
1636 let engine1 = OcrEngine::Azure;
1638 let engine2 = engine1;
1639 assert_eq!(engine1, engine2);
1640 }
1641
1642 #[test]
1643 fn test_ocr_engine_format_support_matrix() {
1644 let _engines = [
1646 OcrEngine::Mock,
1647 OcrEngine::Tesseract,
1648 OcrEngine::Azure,
1649 OcrEngine::Aws,
1650 OcrEngine::GoogleCloud,
1651 ];
1652
1653 let formats = [ImageFormat::Jpeg, ImageFormat::Png, ImageFormat::Tiff];
1654
1655 let expected = vec![
1657 (OcrEngine::Mock, vec![true, true, true]),
1658 (OcrEngine::Tesseract, vec![true, true, true]),
1659 (OcrEngine::Azure, vec![true, true, false]),
1660 (OcrEngine::Aws, vec![true, true, false]),
1661 (OcrEngine::GoogleCloud, vec![true, true, false]),
1662 ];
1663
1664 for (engine, expected_support) in expected {
1665 for (i, format) in formats.iter().enumerate() {
1666 assert_eq!(
1667 engine.supports_format(*format),
1668 expected_support[i],
1669 "Engine {:?} format {:?} support mismatch",
1670 engine,
1671 format
1672 );
1673 }
1674 }
1675 }
1676
1677 #[test]
1679 fn test_validate_image_data_all_formats() {
1680 let provider = MockOcrProvider::new();
1681
1682 let test_cases = vec![
1684 (vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46], true),
1686 (vec![0xFF, 0xD8, 0xFF, 0xE1, 0x00, 0x10, 0x45, 0x78], true),
1688 (vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], true),
1690 (vec![0x49, 0x49, 0x2A, 0x00, 0x00, 0x00, 0x00, 0x00], true),
1692 (vec![0x4D, 0x4D, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x00], true),
1694 (vec![0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x00, 0x00], false),
1696 (vec![0x42, 0x4D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], false),
1698 (vec![0xFF, 0xD8], false),
1700 (vec![], false),
1702 ];
1703
1704 for (data, should_succeed) in test_cases {
1705 let result = provider.validate_image_data(&data);
1706 assert_eq!(
1707 result.is_ok(),
1708 should_succeed,
1709 "Failed for data: {:?}",
1710 &data[..data.len().min(8)]
1711 );
1712 }
1713 }
1714
1715 #[test]
1716 fn test_ocr_options_with_all_preprocessing() {
1717 let options = OcrOptions {
1718 language: "deu+eng+fra".to_string(),
1719 min_confidence: 0.85,
1720 preserve_layout: true,
1721 preprocessing: ImagePreprocessing {
1722 denoise: true,
1723 deskew: true,
1724 enhance_contrast: true,
1725 sharpen: true,
1726 scale_factor: 1.5,
1727 },
1728 engine_options: {
1729 let mut map = HashMap::new();
1730 map.insert("param1".to_string(), "value1".to_string());
1731 map.insert("param2".to_string(), "value2".to_string());
1732 map
1733 },
1734 timeout_seconds: 120,
1735 };
1736
1737 assert_eq!(options.language, "deu+eng+fra");
1739 assert_eq!(options.min_confidence, 0.85);
1740 assert!(options.preserve_layout);
1741 assert!(options.preprocessing.denoise);
1742 assert!(options.preprocessing.deskew);
1743 assert!(options.preprocessing.enhance_contrast);
1744 assert!(options.preprocessing.sharpen);
1745 assert_eq!(options.preprocessing.scale_factor, 1.5);
1746 assert_eq!(options.engine_options.len(), 2);
1747 assert_eq!(options.timeout_seconds, 120);
1748 }
1749
1750 #[test]
1751 fn test_fragment_boundary_calculations() {
1752 let fragments = [
1753 OcrTextFragment {
1754 text: "TopLeft".to_string(),
1755 x: 0.0,
1756 y: 0.0,
1757 width: 50.0,
1758 height: 20.0,
1759 confidence: 0.9,
1760 font_size: 12.0,
1761 fragment_type: FragmentType::Word,
1762 },
1763 OcrTextFragment {
1764 text: "BottomRight".to_string(),
1765 x: 550.0,
1766 y: 770.0,
1767 width: 60.0,
1768 height: 20.0,
1769 confidence: 0.9,
1770 font_size: 12.0,
1771 fragment_type: FragmentType::Word,
1772 },
1773 ];
1774
1775 let min_x = fragments.iter().map(|f| f.x).fold(f64::INFINITY, f64::min);
1777 let min_y = fragments.iter().map(|f| f.y).fold(f64::INFINITY, f64::min);
1778 let max_x = fragments
1779 .iter()
1780 .map(|f| f.x + f.width)
1781 .fold(f64::NEG_INFINITY, f64::max);
1782 let max_y = fragments
1783 .iter()
1784 .map(|f| f.y + f.height)
1785 .fold(f64::NEG_INFINITY, f64::max);
1786
1787 assert_eq!(min_x, 0.0);
1788 assert_eq!(min_y, 0.0);
1789 assert_eq!(max_x, 610.0);
1790 assert_eq!(max_y, 790.0);
1791 }
1792
1793 #[test]
1794 fn test_error_chain_context() {
1795 use std::io::{Error as IoError, ErrorKind};
1796
1797 let io_error = IoError::new(ErrorKind::PermissionDenied, "Access denied to image file");
1799 let ocr_error: OcrError = io_error.into();
1800
1801 let error_chain = format!("{}", ocr_error);
1802 assert!(error_chain.contains("IO error"));
1803
1804 let processing_error = OcrError::ProcessingFailed(
1806 "Failed to process page 5: insufficient memory".to_string(),
1807 );
1808 let error_msg = format!("{}", processing_error);
1809 assert!(error_msg.contains("page 5"));
1810 assert!(error_msg.contains("insufficient memory"));
1811 }
1812
1813 #[test]
1814 fn test_concurrent_result_processing() {
1815 use std::sync::{Arc, Mutex};
1816 use std::thread;
1817
1818 let result = Arc::new(OcrProcessingResult {
1820 text: "Concurrent test".to_string(),
1821 confidence: 0.85,
1822 fragments: vec![
1823 OcrTextFragment {
1824 text: "Fragment1".to_string(),
1825 x: 0.0,
1826 y: 0.0,
1827 width: 100.0,
1828 height: 20.0,
1829 confidence: 0.9,
1830 font_size: 12.0,
1831 fragment_type: FragmentType::Word,
1832 },
1833 OcrTextFragment {
1834 text: "Fragment2".to_string(),
1835 x: 0.0,
1836 y: 25.0,
1837 width: 100.0,
1838 height: 20.0,
1839 confidence: 0.8,
1840 font_size: 12.0,
1841 fragment_type: FragmentType::Word,
1842 },
1843 ],
1844 processing_time_ms: 100,
1845 engine_name: "Test".to_string(),
1846 language: "en".to_string(),
1847 image_dimensions: (200, 100),
1848 });
1849
1850 let counter = Arc::new(Mutex::new(0));
1851 let mut handles = vec![];
1852
1853 for _ in 0..10 {
1855 let result_clone = Arc::clone(&result);
1856 let counter_clone = Arc::clone(&counter);
1857
1858 let handle = thread::spawn(move || {
1859 let _ = result_clone.filter_by_confidence(0.85);
1861 let _ = result_clone.fragments_in_region(0.0, 0.0, 200.0, 100.0);
1862 let _ = result_clone.average_confidence();
1863
1864 let mut count = counter_clone.lock().unwrap();
1865 *count += 1;
1866 });
1867
1868 handles.push(handle);
1869 }
1870
1871 for handle in handles {
1873 handle.join().unwrap();
1874 }
1875
1876 assert_eq!(*counter.lock().unwrap(), 10);
1877 }
1878 }
1879}