1use crate::graphics::ImageFormat;
72use crate::operations::page_analysis::ContentAnalysis;
73use std::fmt;
74
75pub type OcrResult<T> = Result<T, OcrError>;
77
78#[derive(Debug, thiserror::Error)]
80pub enum OcrError {
81 #[error("OCR provider not available: {0}")]
83 ProviderNotAvailable(String),
84
85 #[error("Unsupported image format: {0:?}")]
87 UnsupportedImageFormat(ImageFormat),
88
89 #[error("Invalid image data: {0}")]
91 InvalidImageData(String),
92
93 #[error("OCR processing failed: {0}")]
95 ProcessingFailed(String),
96
97 #[error("Network error: {0}")]
99 NetworkError(String),
100
101 #[error("Authentication error: {0}")]
103 AuthenticationError(String),
104
105 #[error("Rate limit exceeded: {0}")]
107 RateLimitExceeded(String),
108
109 #[error("Low confidence results: {0}")]
111 LowConfidence(String),
112
113 #[error("IO error: {0}")]
115 Io(#[from] std::io::Error),
116
117 #[error("Configuration error: {0}")]
119 Configuration(String),
120}
121
122#[derive(Debug, Clone)]
124pub struct OcrOptions {
125 pub language: String,
127
128 pub min_confidence: f64,
130
131 pub preserve_layout: bool,
133
134 pub preprocessing: ImagePreprocessing,
136
137 pub engine_options: std::collections::HashMap<String, String>,
139
140 pub timeout_seconds: u32,
142}
143
144impl Default for OcrOptions {
145 fn default() -> Self {
146 Self {
147 language: "en".to_string(),
148 min_confidence: 0.6,
149 preserve_layout: true,
150 preprocessing: ImagePreprocessing::default(),
151 engine_options: std::collections::HashMap::new(),
152 timeout_seconds: 30,
153 }
154 }
155}
156
157#[derive(Debug, Clone)]
159pub struct ImagePreprocessing {
160 pub denoise: bool,
162
163 pub deskew: bool,
165
166 pub enhance_contrast: bool,
168
169 pub sharpen: bool,
171
172 pub scale_factor: f64,
174}
175
176impl Default for ImagePreprocessing {
177 fn default() -> Self {
178 Self {
179 denoise: true,
180 deskew: true,
181 enhance_contrast: true,
182 sharpen: false,
183 scale_factor: 1.0,
184 }
185 }
186}
187
188#[derive(Debug, Clone)]
190pub struct OcrTextFragment {
191 pub text: String,
193
194 pub x: f64,
196
197 pub y: f64,
199
200 pub width: f64,
202
203 pub height: f64,
205
206 pub confidence: f64,
208
209 pub font_size: f64,
211
212 pub fragment_type: FragmentType,
214}
215
216#[derive(Debug, Clone, Copy, PartialEq, Eq)]
218pub enum FragmentType {
219 Character,
221 Word,
223 Line,
225 Paragraph,
227}
228
229#[derive(Debug, Clone)]
231pub struct OcrProcessingResult {
232 pub text: String,
234
235 pub confidence: f64,
237
238 pub fragments: Vec<OcrTextFragment>,
240
241 pub processing_time_ms: u64,
243
244 pub engine_name: String,
246
247 pub language: String,
249
250 pub image_dimensions: (u32, u32),
252}
253
254impl OcrProcessingResult {
255 pub fn filter_by_confidence(&self, min_confidence: f64) -> Vec<&OcrTextFragment> {
257 self.fragments
258 .iter()
259 .filter(|fragment| fragment.confidence >= min_confidence)
260 .collect()
261 }
262
263 pub fn fragments_in_region(
265 &self,
266 x: f64,
267 y: f64,
268 width: f64,
269 height: f64,
270 ) -> Vec<&OcrTextFragment> {
271 self.fragments
272 .iter()
273 .filter(|fragment| {
274 fragment.x >= x
275 && fragment.y >= y
276 && fragment.x + fragment.width <= x + width
277 && fragment.y + fragment.height <= y + height
278 })
279 .collect()
280 }
281
282 pub fn fragments_of_type(&self, fragment_type: FragmentType) -> Vec<&OcrTextFragment> {
284 self.fragments
285 .iter()
286 .filter(|fragment| fragment.fragment_type == fragment_type)
287 .collect()
288 }
289
290 pub fn average_confidence(&self) -> f64 {
292 if self.fragments.is_empty() {
293 return 0.0;
294 }
295
296 let sum: f64 = self.fragments.iter().map(|f| f.confidence).sum();
297 sum / self.fragments.len() as f64
298 }
299}
300
301#[derive(Debug, Clone, Copy, PartialEq, Eq)]
303pub enum OcrEngine {
304 Mock,
306 Tesseract,
308 Azure,
310 Aws,
312 GoogleCloud,
314}
315
316impl OcrEngine {
317 pub fn name(&self) -> &'static str {
319 match self {
320 OcrEngine::Mock => "Mock OCR",
321 OcrEngine::Tesseract => "Tesseract",
322 OcrEngine::Azure => "Azure Computer Vision",
323 OcrEngine::Aws => "AWS Textract",
324 OcrEngine::GoogleCloud => "Google Cloud Vision",
325 }
326 }
327
328 pub fn supports_format(&self, format: ImageFormat) -> bool {
330 match self {
331 OcrEngine::Mock => true, OcrEngine::Tesseract => matches!(
333 format,
334 ImageFormat::Jpeg | ImageFormat::Png | ImageFormat::Tiff
335 ),
336 OcrEngine::Azure => matches!(format, ImageFormat::Jpeg | ImageFormat::Png),
337 OcrEngine::Aws => matches!(format, ImageFormat::Jpeg | ImageFormat::Png),
338 OcrEngine::GoogleCloud => matches!(format, ImageFormat::Jpeg | ImageFormat::Png),
339 }
340 }
341}
342
343impl fmt::Display for OcrEngine {
344 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
345 write!(f, "{}", self.name())
346 }
347}
348
349pub trait OcrProvider: Send + Sync {
397 fn process_image(
420 &self,
421 image_data: &[u8],
422 options: &OcrOptions,
423 ) -> OcrResult<OcrProcessingResult>;
424
425 fn process_page(
445 &self,
446 _page_analysis: &ContentAnalysis,
447 page_data: &[u8],
448 options: &OcrOptions,
449 ) -> OcrResult<OcrProcessingResult> {
450 self.process_image(page_data, options)
451 }
452
453 fn supported_formats(&self) -> Vec<ImageFormat>;
459
460 fn engine_name(&self) -> &str;
466
467 fn engine_type(&self) -> OcrEngine;
473
474 fn supports_format(&self, format: ImageFormat) -> bool {
484 self.supported_formats().contains(&format)
485 }
486
487 fn validate_image_data(&self, image_data: &[u8]) -> OcrResult<()> {
504 if image_data.len() < 8 {
505 return Err(OcrError::InvalidImageData(
506 "Image data too short".to_string(),
507 ));
508 }
509
510 let format = if image_data.starts_with(b"\xFF\xD8\xFF") {
512 ImageFormat::Jpeg
513 } else if image_data.starts_with(b"\x89PNG\r\n\x1a\n") {
514 ImageFormat::Png
515 } else if image_data.starts_with(b"II\x2A\x00") || image_data.starts_with(b"MM\x00\x2A") {
516 ImageFormat::Tiff
517 } else {
518 return Err(OcrError::InvalidImageData(
519 "Unrecognized image format".to_string(),
520 ));
521 };
522
523 if !self.supports_format(format) {
524 return Err(OcrError::UnsupportedImageFormat(format));
525 }
526
527 Ok(())
528 }
529}
530
531#[derive(Clone)]
549pub struct MockOcrProvider {
550 confidence: f64,
552 mock_text: String,
554 processing_delay_ms: u64,
556}
557
558impl MockOcrProvider {
559 pub fn new() -> Self {
561 Self {
562 confidence: 0.85,
563 mock_text: "Mock OCR extracted text from scanned image".to_string(),
564 processing_delay_ms: 100,
565 }
566 }
567
568 pub fn with_text_and_confidence(text: String, confidence: f64) -> Self {
570 Self {
571 confidence,
572 mock_text: text,
573 processing_delay_ms: 100,
574 }
575 }
576
577 pub fn set_mock_text(&mut self, text: String) {
579 self.mock_text = text;
580 }
581
582 pub fn set_confidence(&mut self, confidence: f64) {
584 self.confidence = confidence.clamp(0.0, 1.0);
585 }
586
587 pub fn set_processing_delay(&mut self, delay_ms: u64) {
589 self.processing_delay_ms = delay_ms;
590 }
591}
592
593impl Default for MockOcrProvider {
594 fn default() -> Self {
595 Self::new()
596 }
597}
598
599impl OcrProvider for MockOcrProvider {
600 fn process_image(
601 &self,
602 image_data: &[u8],
603 options: &OcrOptions,
604 ) -> OcrResult<OcrProcessingResult> {
605 self.validate_image_data(image_data)?;
607
608 std::thread::sleep(std::time::Duration::from_millis(self.processing_delay_ms));
610
611 let fragments = vec![
613 OcrTextFragment {
614 text: self.mock_text.clone(),
615 x: 50.0,
616 y: 700.0,
617 width: 200.0,
618 height: 20.0,
619 confidence: self.confidence,
620 font_size: 12.0,
621 fragment_type: FragmentType::Line,
622 },
623 OcrTextFragment {
624 text: "Additional mock text".to_string(),
625 x: 50.0,
626 y: 680.0,
627 width: 150.0,
628 height: 20.0,
629 confidence: self.confidence * 0.9,
630 font_size: 12.0,
631 fragment_type: FragmentType::Line,
632 },
633 ];
634
635 Ok(OcrProcessingResult {
636 text: format!("{}\nAdditional mock text", self.mock_text),
637 confidence: self.confidence,
638 fragments,
639 processing_time_ms: self.processing_delay_ms,
640 engine_name: "Mock OCR".to_string(),
641 language: options.language.clone(),
642 image_dimensions: (800, 600), })
644 }
645
646 fn supported_formats(&self) -> Vec<ImageFormat> {
647 vec![ImageFormat::Jpeg, ImageFormat::Png, ImageFormat::Tiff]
648 }
649
650 fn engine_name(&self) -> &str {
651 "Mock OCR"
652 }
653
654 fn engine_type(&self) -> OcrEngine {
655 OcrEngine::Mock
656 }
657}
658
659#[cfg(test)]
660mod tests {
661 use super::*;
662
663 #[test]
664 fn test_ocr_options_default() {
665 let options = OcrOptions::default();
666 assert_eq!(options.language, "en");
667 assert_eq!(options.min_confidence, 0.6);
668 assert!(options.preserve_layout);
669 assert_eq!(options.timeout_seconds, 30);
670 }
671
672 #[test]
673 fn test_image_preprocessing_default() {
674 let preprocessing = ImagePreprocessing::default();
675 assert!(preprocessing.denoise);
676 assert!(preprocessing.deskew);
677 assert!(preprocessing.enhance_contrast);
678 assert!(!preprocessing.sharpen);
679 assert_eq!(preprocessing.scale_factor, 1.0);
680 }
681
682 #[test]
683 fn test_ocr_engine_name() {
684 assert_eq!(OcrEngine::Mock.name(), "Mock OCR");
685 assert_eq!(OcrEngine::Tesseract.name(), "Tesseract");
686 assert_eq!(OcrEngine::Azure.name(), "Azure Computer Vision");
687 }
688
689 #[test]
690 fn test_ocr_engine_supports_format() {
691 assert!(OcrEngine::Mock.supports_format(ImageFormat::Jpeg));
692 assert!(OcrEngine::Mock.supports_format(ImageFormat::Png));
693 assert!(OcrEngine::Mock.supports_format(ImageFormat::Tiff));
694
695 assert!(OcrEngine::Tesseract.supports_format(ImageFormat::Jpeg));
696 assert!(OcrEngine::Tesseract.supports_format(ImageFormat::Png));
697 assert!(OcrEngine::Tesseract.supports_format(ImageFormat::Tiff));
698
699 assert!(OcrEngine::Azure.supports_format(ImageFormat::Jpeg));
700 assert!(OcrEngine::Azure.supports_format(ImageFormat::Png));
701 assert!(!OcrEngine::Azure.supports_format(ImageFormat::Tiff));
702 }
703
704 #[test]
705 fn test_fragment_type_equality() {
706 assert_eq!(FragmentType::Word, FragmentType::Word);
707 assert_ne!(FragmentType::Word, FragmentType::Line);
708 assert_ne!(FragmentType::Character, FragmentType::Paragraph);
709 }
710
711 #[test]
712 fn test_mock_ocr_provider_creation() {
713 let provider = MockOcrProvider::new();
714 assert_eq!(provider.confidence, 0.85);
715 assert!(provider.mock_text.contains("Mock OCR"));
716 assert_eq!(provider.processing_delay_ms, 100);
717 }
718
719 #[test]
720 fn test_mock_ocr_provider_with_custom_text() {
721 let custom_text = "Custom mock text".to_string();
722 let provider = MockOcrProvider::with_text_and_confidence(custom_text.clone(), 0.95);
723 assert_eq!(provider.mock_text, custom_text);
724 assert_eq!(provider.confidence, 0.95);
725 }
726
727 #[test]
728 fn test_mock_ocr_provider_process_image() {
729 let provider = MockOcrProvider::new();
730 let options = OcrOptions::default();
731
732 let jpeg_data = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
734
735 let result = provider.process_image(&jpeg_data, &options).unwrap();
736 assert!(result.text.contains("Mock OCR"));
737 assert_eq!(result.confidence, 0.85);
738 assert!(!result.fragments.is_empty());
739 assert_eq!(result.engine_name, "Mock OCR");
740 assert_eq!(result.language, "en");
741 }
742
743 #[test]
744 fn test_mock_ocr_provider_supported_formats() {
745 let provider = MockOcrProvider::new();
746 let formats = provider.supported_formats();
747 assert!(formats.contains(&ImageFormat::Jpeg));
748 assert!(formats.contains(&ImageFormat::Png));
749 assert!(formats.contains(&ImageFormat::Tiff));
750 }
751
752 #[test]
753 fn test_mock_ocr_provider_engine_info() {
754 let provider = MockOcrProvider::new();
755 assert_eq!(provider.engine_name(), "Mock OCR");
756 assert_eq!(provider.engine_type(), OcrEngine::Mock);
757 }
758
759 #[test]
760 fn test_mock_ocr_provider_supports_format() {
761 let provider = MockOcrProvider::new();
762 assert!(provider.supports_format(ImageFormat::Jpeg));
763 assert!(provider.supports_format(ImageFormat::Png));
764 assert!(provider.supports_format(ImageFormat::Tiff));
765 }
766
767 #[test]
768 fn test_mock_ocr_provider_validate_image_data() {
769 let provider = MockOcrProvider::new();
770
771 let jpeg_data = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
773 assert!(provider.validate_image_data(&jpeg_data).is_ok());
774
775 let short_data = vec![0xFF, 0xD8];
777 assert!(provider.validate_image_data(&short_data).is_err());
778
779 let invalid_data = vec![0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09];
781 assert!(provider.validate_image_data(&invalid_data).is_err());
782 }
783
784 #[test]
785 fn test_ocr_processing_result_filter_by_confidence() {
786 let result = OcrProcessingResult {
787 text: "Test text".to_string(),
788 confidence: 0.8,
789 fragments: vec![
790 OcrTextFragment {
791 text: "High confidence".to_string(),
792 x: 0.0,
793 y: 0.0,
794 width: 100.0,
795 height: 20.0,
796 confidence: 0.9,
797 font_size: 12.0,
798 fragment_type: FragmentType::Word,
799 },
800 OcrTextFragment {
801 text: "Low confidence".to_string(),
802 x: 0.0,
803 y: 20.0,
804 width: 100.0,
805 height: 20.0,
806 confidence: 0.5,
807 font_size: 12.0,
808 fragment_type: FragmentType::Word,
809 },
810 ],
811 processing_time_ms: 100,
812 engine_name: "Test".to_string(),
813 language: "en".to_string(),
814 image_dimensions: (800, 600),
815 };
816
817 let high_confidence = result.filter_by_confidence(0.8);
818 assert_eq!(high_confidence.len(), 1);
819 assert_eq!(high_confidence[0].text, "High confidence");
820 }
821
822 #[test]
823 fn test_ocr_processing_result_fragments_in_region() {
824 let result = OcrProcessingResult {
825 text: "Test text".to_string(),
826 confidence: 0.8,
827 fragments: vec![
828 OcrTextFragment {
829 text: "Inside region".to_string(),
830 x: 10.0,
831 y: 10.0,
832 width: 80.0,
833 height: 20.0,
834 confidence: 0.9,
835 font_size: 12.0,
836 fragment_type: FragmentType::Word,
837 },
838 OcrTextFragment {
839 text: "Outside region".to_string(),
840 x: 200.0,
841 y: 200.0,
842 width: 80.0,
843 height: 20.0,
844 confidence: 0.9,
845 font_size: 12.0,
846 fragment_type: FragmentType::Word,
847 },
848 ],
849 processing_time_ms: 100,
850 engine_name: "Test".to_string(),
851 language: "en".to_string(),
852 image_dimensions: (800, 600),
853 };
854
855 let in_region = result.fragments_in_region(0.0, 0.0, 100.0, 100.0);
856 assert_eq!(in_region.len(), 1);
857 assert_eq!(in_region[0].text, "Inside region");
858 }
859
860 #[test]
861 fn test_ocr_processing_result_fragments_of_type() {
862 let result = OcrProcessingResult {
863 text: "Test text".to_string(),
864 confidence: 0.8,
865 fragments: vec![
866 OcrTextFragment {
867 text: "Word fragment".to_string(),
868 x: 0.0,
869 y: 0.0,
870 width: 100.0,
871 height: 20.0,
872 confidence: 0.9,
873 font_size: 12.0,
874 fragment_type: FragmentType::Word,
875 },
876 OcrTextFragment {
877 text: "Line fragment".to_string(),
878 x: 0.0,
879 y: 20.0,
880 width: 200.0,
881 height: 20.0,
882 confidence: 0.9,
883 font_size: 12.0,
884 fragment_type: FragmentType::Line,
885 },
886 ],
887 processing_time_ms: 100,
888 engine_name: "Test".to_string(),
889 language: "en".to_string(),
890 image_dimensions: (800, 600),
891 };
892
893 let words = result.fragments_of_type(FragmentType::Word);
894 assert_eq!(words.len(), 1);
895 assert_eq!(words[0].text, "Word fragment");
896
897 let lines = result.fragments_of_type(FragmentType::Line);
898 assert_eq!(lines.len(), 1);
899 assert_eq!(lines[0].text, "Line fragment");
900 }
901
902 #[test]
903 fn test_ocr_processing_result_average_confidence() {
904 let result = OcrProcessingResult {
905 text: "Test text".to_string(),
906 confidence: 0.8,
907 fragments: vec![
908 OcrTextFragment {
909 text: "Fragment 1".to_string(),
910 x: 0.0,
911 y: 0.0,
912 width: 100.0,
913 height: 20.0,
914 confidence: 0.8,
915 font_size: 12.0,
916 fragment_type: FragmentType::Word,
917 },
918 OcrTextFragment {
919 text: "Fragment 2".to_string(),
920 x: 0.0,
921 y: 20.0,
922 width: 100.0,
923 height: 20.0,
924 confidence: 0.6,
925 font_size: 12.0,
926 fragment_type: FragmentType::Word,
927 },
928 ],
929 processing_time_ms: 100,
930 engine_name: "Test".to_string(),
931 language: "en".to_string(),
932 image_dimensions: (800, 600),
933 };
934
935 let avg_confidence = result.average_confidence();
936 assert_eq!(avg_confidence, 0.7);
937 }
938
939 #[test]
940 fn test_ocr_processing_result_average_confidence_empty() {
941 let result = OcrProcessingResult {
942 text: "Test text".to_string(),
943 confidence: 0.8,
944 fragments: vec![],
945 processing_time_ms: 100,
946 engine_name: "Test".to_string(),
947 language: "en".to_string(),
948 image_dimensions: (800, 600),
949 };
950
951 let avg_confidence = result.average_confidence();
952 assert_eq!(avg_confidence, 0.0);
953 }
954
955 mod comprehensive_tests {
957 use super::*;
958 use std::collections::HashMap;
959
960 #[test]
962 fn test_ocr_error_display() {
963 let errors = vec![
964 OcrError::ProviderNotAvailable("Tesseract not installed".to_string()),
965 OcrError::UnsupportedImageFormat(ImageFormat::Tiff),
966 OcrError::InvalidImageData("Corrupted header".to_string()),
967 OcrError::ProcessingFailed("OCR engine crashed".to_string()),
968 OcrError::NetworkError("Connection timeout".to_string()),
969 OcrError::AuthenticationError("Invalid API key".to_string()),
970 OcrError::RateLimitExceeded("429 Too Many Requests".to_string()),
971 OcrError::LowConfidence("Confidence below threshold".to_string()),
972 OcrError::Configuration("Missing language pack".to_string()),
973 ];
974
975 for error in errors {
976 let display = format!("{}", error);
977 assert!(!display.is_empty());
978
979 match &error {
981 OcrError::ProviderNotAvailable(msg) => assert!(display.contains(msg)),
982 OcrError::UnsupportedImageFormat(_) => assert!(display.contains("Unsupported image format")),
983 OcrError::InvalidImageData(msg) => assert!(display.contains(msg)),
984 OcrError::ProcessingFailed(msg) => assert!(display.contains(msg)),
985 OcrError::NetworkError(msg) => assert!(display.contains(msg)),
986 OcrError::AuthenticationError(msg) => assert!(display.contains(msg)),
987 OcrError::RateLimitExceeded(msg) => assert!(display.contains(msg)),
988 OcrError::LowConfidence(msg) => assert!(display.contains(msg)),
989 OcrError::Configuration(msg) => assert!(display.contains(msg)),
990 _ => {}
991 }
992 }
993 }
994
995 #[test]
996 fn test_ocr_error_from_io_error() {
997 use std::io::{Error as IoError, ErrorKind};
998
999 let io_error = IoError::new(ErrorKind::NotFound, "File not found");
1000 let ocr_error: OcrError = io_error.into();
1001
1002 match ocr_error {
1003 OcrError::Io(_) => {
1004 let display = format!("{}", ocr_error);
1005 assert!(display.contains("IO error"));
1006 }
1007 _ => panic!("Expected OcrError::Io variant"),
1008 }
1009 }
1010
1011 #[test]
1012 fn test_ocr_error_debug_format() {
1013 let error = OcrError::ProcessingFailed("Test error".to_string());
1014 let debug_str = format!("{:?}", error);
1015 assert!(debug_str.contains("ProcessingFailed"));
1016 assert!(debug_str.contains("Test error"));
1017 }
1018
1019 #[test]
1021 fn test_ocr_options_custom_language() {
1022 let mut options = OcrOptions::default();
1023 assert_eq!(options.language, "en");
1024
1025 options.language = "spa+eng".to_string();
1026 assert_eq!(options.language, "spa+eng");
1027
1028 options.language = "jpn".to_string();
1029 assert_eq!(options.language, "jpn");
1030 }
1031
1032 #[test]
1033 fn test_ocr_options_confidence_threshold() {
1034 let mut options = OcrOptions::default();
1035 assert_eq!(options.min_confidence, 0.6);
1036
1037 options.min_confidence = 0.0;
1039 assert_eq!(options.min_confidence, 0.0);
1040
1041 options.min_confidence = 1.0;
1042 assert_eq!(options.min_confidence, 1.0);
1043
1044 options.min_confidence = 0.85;
1045 assert_eq!(options.min_confidence, 0.85);
1046 }
1047
1048 #[test]
1049 fn test_ocr_options_engine_specific() {
1050 let mut options = OcrOptions::default();
1051 assert!(options.engine_options.is_empty());
1052
1053 options.engine_options.insert("tessedit_char_whitelist".to_string(), "0123456789".to_string());
1055 options.engine_options.insert("tessedit_ocr_engine_mode".to_string(), "3".to_string());
1056
1057 assert_eq!(options.engine_options.len(), 2);
1058 assert_eq!(options.engine_options.get("tessedit_char_whitelist"), Some(&"0123456789".to_string()));
1059 }
1060
1061 #[test]
1062 fn test_ocr_options_clone() {
1063 let mut options = OcrOptions {
1064 language: "fra".to_string(),
1065 min_confidence: 0.75,
1066 preserve_layout: false,
1067 preprocessing: ImagePreprocessing {
1068 denoise: false,
1069 deskew: true,
1070 enhance_contrast: false,
1071 sharpen: true,
1072 scale_factor: 1.5,
1073 },
1074 engine_options: HashMap::new(),
1075 timeout_seconds: 60,
1076 };
1077
1078 options.engine_options.insert("key".to_string(), "value".to_string());
1079
1080 let cloned = options.clone();
1081 assert_eq!(cloned.language, options.language);
1082 assert_eq!(cloned.min_confidence, options.min_confidence);
1083 assert_eq!(cloned.preserve_layout, options.preserve_layout);
1084 assert_eq!(cloned.preprocessing.scale_factor, options.preprocessing.scale_factor);
1085 assert_eq!(cloned.engine_options.get("key"), Some(&"value".to_string()));
1086 assert_eq!(cloned.timeout_seconds, options.timeout_seconds);
1087 }
1088
1089 #[test]
1090 fn test_ocr_options_timeout_configuration() {
1091 let mut options = OcrOptions::default();
1092 assert_eq!(options.timeout_seconds, 30);
1093
1094 options.timeout_seconds = 0; assert_eq!(options.timeout_seconds, 0);
1096
1097 options.timeout_seconds = 300; assert_eq!(options.timeout_seconds, 300);
1099 }
1100
1101 #[test]
1103 fn test_image_preprocessing_combinations() {
1104 let test_cases = vec![
1105 (true, true, true, true),
1106 (false, false, false, false),
1107 (true, false, true, false),
1108 (false, true, false, true),
1109 ];
1110
1111 for (denoise, deskew, enhance, sharpen) in test_cases {
1112 let preprocessing = ImagePreprocessing {
1113 denoise,
1114 deskew,
1115 enhance_contrast: enhance,
1116 sharpen,
1117 scale_factor: 1.0,
1118 };
1119
1120 assert_eq!(preprocessing.denoise, denoise);
1121 assert_eq!(preprocessing.deskew, deskew);
1122 assert_eq!(preprocessing.enhance_contrast, enhance);
1123 assert_eq!(preprocessing.sharpen, sharpen);
1124 }
1125 }
1126
1127 #[test]
1128 fn test_image_preprocessing_scale_factor() {
1129 let mut preprocessing = ImagePreprocessing::default();
1130 assert_eq!(preprocessing.scale_factor, 1.0);
1131
1132 preprocessing.scale_factor = 0.5;
1134 assert_eq!(preprocessing.scale_factor, 0.5);
1135
1136 preprocessing.scale_factor = 2.0;
1137 assert_eq!(preprocessing.scale_factor, 2.0);
1138
1139 preprocessing.scale_factor = 1.25;
1140 assert_eq!(preprocessing.scale_factor, 1.25);
1141 }
1142
1143 #[test]
1144 fn test_image_preprocessing_clone() {
1145 let preprocessing = ImagePreprocessing {
1146 denoise: false,
1147 deskew: true,
1148 enhance_contrast: false,
1149 sharpen: true,
1150 scale_factor: 1.5,
1151 };
1152
1153 let cloned = preprocessing.clone();
1154 assert_eq!(cloned.denoise, preprocessing.denoise);
1155 assert_eq!(cloned.deskew, preprocessing.deskew);
1156 assert_eq!(cloned.enhance_contrast, preprocessing.enhance_contrast);
1157 assert_eq!(cloned.sharpen, preprocessing.sharpen);
1158 assert_eq!(cloned.scale_factor, preprocessing.scale_factor);
1159 }
1160
1161 #[test]
1163 fn test_ocr_text_fragment_creation() {
1164 let fragment = OcrTextFragment {
1165 text: "Hello World".to_string(),
1166 x: 100.0,
1167 y: 200.0,
1168 width: 150.0,
1169 height: 25.0,
1170 confidence: 0.92,
1171 font_size: 14.0,
1172 fragment_type: FragmentType::Line,
1173 };
1174
1175 assert_eq!(fragment.text, "Hello World");
1176 assert_eq!(fragment.x, 100.0);
1177 assert_eq!(fragment.y, 200.0);
1178 assert_eq!(fragment.width, 150.0);
1179 assert_eq!(fragment.height, 25.0);
1180 assert_eq!(fragment.confidence, 0.92);
1181 assert_eq!(fragment.font_size, 14.0);
1182 assert_eq!(fragment.fragment_type, FragmentType::Line);
1183 }
1184
1185 #[test]
1186 fn test_ocr_text_fragment_clone() {
1187 let fragment = OcrTextFragment {
1188 text: "Test".to_string(),
1189 x: 50.0,
1190 y: 100.0,
1191 width: 40.0,
1192 height: 15.0,
1193 confidence: 0.88,
1194 font_size: 11.0,
1195 fragment_type: FragmentType::Word,
1196 };
1197
1198 let cloned = fragment.clone();
1199 assert_eq!(cloned.text, fragment.text);
1200 assert_eq!(cloned.x, fragment.x);
1201 assert_eq!(cloned.confidence, fragment.confidence);
1202 assert_eq!(cloned.fragment_type, fragment.fragment_type);
1203 }
1204
1205 #[test]
1206 fn test_fragment_type_copy() {
1207 let ft1 = FragmentType::Character;
1208 let ft2 = ft1; assert_eq!(ft1, ft2);
1210 assert_eq!(ft1, FragmentType::Character);
1211 }
1212
1213 #[test]
1214 fn test_fragment_position_calculations() {
1215 let fragment = OcrTextFragment {
1216 text: "Test".to_string(),
1217 x: 100.0,
1218 y: 200.0,
1219 width: 50.0,
1220 height: 20.0,
1221 confidence: 0.9,
1222 font_size: 12.0,
1223 fragment_type: FragmentType::Word,
1224 };
1225
1226 let right = fragment.x + fragment.width;
1228 let bottom = fragment.y + fragment.height;
1229
1230 assert_eq!(right, 150.0);
1231 assert_eq!(bottom, 220.0);
1232 }
1233
1234 #[test]
1236 fn test_ocr_result_complex_region_filtering() {
1237 let fragments = vec![
1238 OcrTextFragment {
1239 text: "A".to_string(),
1240 x: 10.0, y: 10.0, width: 20.0, height: 20.0,
1241 confidence: 0.9, font_size: 12.0,
1242 fragment_type: FragmentType::Character,
1243 },
1244 OcrTextFragment {
1245 text: "B".to_string(),
1246 x: 25.0, y: 10.0, width: 20.0, height: 20.0,
1247 confidence: 0.9, font_size: 12.0,
1248 fragment_type: FragmentType::Character,
1249 },
1250 OcrTextFragment {
1251 text: "C".to_string(),
1252 x: 10.0, y: 35.0, width: 20.0, height: 20.0,
1253 confidence: 0.9, font_size: 12.0,
1254 fragment_type: FragmentType::Character,
1255 },
1256 OcrTextFragment {
1257 text: "D".to_string(),
1258 x: 100.0, y: 100.0, width: 20.0, height: 20.0,
1259 confidence: 0.9, font_size: 12.0,
1260 fragment_type: FragmentType::Character,
1261 },
1262 ];
1263
1264 let result = OcrProcessingResult {
1265 text: "ABCD".to_string(),
1266 confidence: 0.9,
1267 fragments,
1268 processing_time_ms: 50,
1269 engine_name: "Test".to_string(),
1270 language: "en".to_string(),
1271 image_dimensions: (200, 200),
1272 };
1273
1274 let region1 = result.fragments_in_region(0.0, 0.0, 50.0, 50.0);
1276 assert_eq!(region1.len(), 2); let region2 = result.fragments_in_region(10.0, 10.0, 20.0, 20.0);
1280 assert_eq!(region2.len(), 1); let region3 = result.fragments_in_region(200.0, 200.0, 50.0, 50.0);
1284 assert_eq!(region3.len(), 0);
1285 }
1286
1287 #[test]
1288 fn test_ocr_result_confidence_edge_cases() {
1289 let fragments = vec![
1290 OcrTextFragment {
1291 text: "Perfect".to_string(),
1292 x: 0.0, y: 0.0, width: 100.0, height: 20.0,
1293 confidence: 1.0, font_size: 12.0,
1294 fragment_type: FragmentType::Word,
1295 },
1296 OcrTextFragment {
1297 text: "Zero".to_string(),
1298 x: 0.0, y: 25.0, width: 50.0, height: 20.0,
1299 confidence: 0.0, font_size: 12.0,
1300 fragment_type: FragmentType::Word,
1301 },
1302 OcrTextFragment {
1303 text: "Mid".to_string(),
1304 x: 0.0, y: 50.0, width: 30.0, height: 20.0,
1305 confidence: 0.5, font_size: 12.0,
1306 fragment_type: FragmentType::Word,
1307 },
1308 ];
1309
1310 let result = OcrProcessingResult {
1311 text: "Perfect Zero Mid".to_string(),
1312 confidence: 0.5,
1313 fragments,
1314 processing_time_ms: 50,
1315 engine_name: "Test".to_string(),
1316 language: "en".to_string(),
1317 image_dimensions: (200, 200),
1318 };
1319
1320 assert_eq!(result.filter_by_confidence(0.0).len(), 3);
1322 assert_eq!(result.filter_by_confidence(0.5).len(), 2);
1323 assert_eq!(result.filter_by_confidence(1.0).len(), 1);
1324 assert_eq!(result.filter_by_confidence(1.1).len(), 0);
1325 }
1326
1327 #[test]
1328 fn test_ocr_result_fragment_type_combinations() {
1329 let fragments = vec![
1330 OcrTextFragment {
1331 text: "A".to_string(),
1332 x: 0.0, y: 0.0, width: 10.0, height: 20.0,
1333 confidence: 0.9, font_size: 12.0,
1334 fragment_type: FragmentType::Character,
1335 },
1336 OcrTextFragment {
1337 text: "Word".to_string(),
1338 x: 20.0, y: 0.0, width: 40.0, height: 20.0,
1339 confidence: 0.9, font_size: 12.0,
1340 fragment_type: FragmentType::Word,
1341 },
1342 OcrTextFragment {
1343 text: "Line of text".to_string(),
1344 x: 0.0, y: 25.0, width: 100.0, height: 20.0,
1345 confidence: 0.9, font_size: 12.0,
1346 fragment_type: FragmentType::Line,
1347 },
1348 OcrTextFragment {
1349 text: "Paragraph text...".to_string(),
1350 x: 0.0, y: 50.0, width: 200.0, height: 100.0,
1351 confidence: 0.9, font_size: 12.0,
1352 fragment_type: FragmentType::Paragraph,
1353 },
1354 ];
1355
1356 let result = OcrProcessingResult {
1357 text: "Combined".to_string(),
1358 confidence: 0.9,
1359 fragments,
1360 processing_time_ms: 50,
1361 engine_name: "Test".to_string(),
1362 language: "en".to_string(),
1363 image_dimensions: (300, 300),
1364 };
1365
1366 assert_eq!(result.fragments_of_type(FragmentType::Character).len(), 1);
1367 assert_eq!(result.fragments_of_type(FragmentType::Word).len(), 1);
1368 assert_eq!(result.fragments_of_type(FragmentType::Line).len(), 1);
1369 assert_eq!(result.fragments_of_type(FragmentType::Paragraph).len(), 1);
1370 }
1371
1372 #[test]
1373 fn test_ocr_result_large_fragment_set() {
1374 let mut fragments = Vec::new();
1376 for i in 0..1000 {
1377 fragments.push(OcrTextFragment {
1378 text: format!("Fragment{}", i),
1379 x: (i % 10) as f64 * 50.0,
1380 y: (i / 10) as f64 * 20.0,
1381 width: 45.0,
1382 height: 18.0,
1383 confidence: 0.5 + (i as f64 % 50.0) / 100.0,
1384 font_size: 12.0,
1385 fragment_type: if i % 4 == 0 { FragmentType::Line } else { FragmentType::Word },
1386 });
1387 }
1388
1389 let result = OcrProcessingResult {
1390 text: "Large document".to_string(),
1391 confidence: 0.75,
1392 fragments,
1393 processing_time_ms: 500,
1394 engine_name: "Test".to_string(),
1395 language: "en".to_string(),
1396 image_dimensions: (500, 2000),
1397 };
1398
1399 let high_conf = result.filter_by_confidence(0.8);
1401 assert!(high_conf.len() < 1000);
1402
1403 let lines = result.fragments_of_type(FragmentType::Line);
1404 assert_eq!(lines.len(), 250); let region = result.fragments_in_region(0.0, 0.0, 200.0, 200.0);
1407 assert!(region.len() > 0);
1408
1409 let avg = result.average_confidence();
1410 assert!(avg > 0.5 && avg < 1.0);
1411 }
1412
1413 #[test]
1414 fn test_ocr_result_empty_handling() {
1415 let result = OcrProcessingResult {
1416 text: String::new(),
1417 confidence: 0.0,
1418 fragments: vec![],
1419 processing_time_ms: 10,
1420 engine_name: "Test".to_string(),
1421 language: "en".to_string(),
1422 image_dimensions: (0, 0),
1423 };
1424
1425 assert_eq!(result.filter_by_confidence(0.5).len(), 0);
1426 assert_eq!(result.fragments_in_region(0.0, 0.0, 100.0, 100.0).len(), 0);
1427 assert_eq!(result.fragments_of_type(FragmentType::Word).len(), 0);
1428 assert_eq!(result.average_confidence(), 0.0);
1429 }
1430
1431 #[test]
1433 fn test_mock_provider_configuration_mutations() {
1434 let mut provider = MockOcrProvider::new();
1435
1436 provider.set_mock_text("Custom mock text".to_string());
1438
1439 provider.set_confidence(0.95);
1441
1442 provider.set_processing_delay(200);
1444
1445 let options = OcrOptions::default();
1446 let jpeg_data = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
1447
1448 let result = provider.process_image(&jpeg_data, &options).unwrap();
1449 assert!(result.text.contains("Custom mock text"));
1450 assert_eq!(result.confidence, 0.95);
1451 assert_eq!(result.processing_time_ms, 200);
1452 }
1453
1454 #[test]
1455 fn test_mock_provider_confidence_clamping() {
1456 let mut provider = MockOcrProvider::new();
1457
1458 provider.set_confidence(1.5);
1460 assert_eq!(provider.confidence, 1.0);
1461
1462 provider.set_confidence(-0.5);
1464 assert_eq!(provider.confidence, 0.0);
1465
1466 provider.set_confidence(0.75);
1468 assert_eq!(provider.confidence, 0.75);
1469 }
1470
1471 #[test]
1472 fn test_mock_provider_validate_png() {
1473 let provider = MockOcrProvider::new();
1474
1475 let png_data = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
1477 assert!(provider.validate_image_data(&png_data).is_ok());
1478
1479 let bad_png = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0B];
1481 assert!(provider.validate_image_data(&bad_png).is_err());
1482 }
1483
1484 #[test]
1485 fn test_mock_provider_validate_tiff() {
1486 let provider = MockOcrProvider::new();
1487
1488 let tiff_le = vec![0x49, 0x49, 0x2A, 0x00, 0x00, 0x00, 0x00, 0x00];
1490 assert!(provider.validate_image_data(&tiff_le).is_ok());
1491
1492 let tiff_be = vec![0x4D, 0x4D, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x00];
1494 assert!(provider.validate_image_data(&tiff_be).is_ok());
1495 }
1496
1497 #[test]
1498 fn test_mock_provider_process_page() {
1499 let provider = MockOcrProvider::new();
1500 let options = OcrOptions::default();
1501
1502 let analysis = ContentAnalysis {
1504 page_number: 0,
1505 page_type: crate::operations::page_analysis::PageType::Scanned,
1506 text_ratio: 0.0,
1507 image_ratio: 1.0,
1508 blank_space_ratio: 0.0,
1509 text_fragment_count: 0,
1510 image_count: 1,
1511 character_count: 0,
1512 };
1513
1514 let jpeg_data = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
1515
1516 let result = provider.process_page(&analysis, &jpeg_data, &options).unwrap();
1518 assert!(result.text.contains("Mock OCR"));
1519 }
1520
1521 #[test]
1522 fn test_mock_provider_thread_safety() {
1523 use std::sync::Arc;
1524 use std::thread;
1525
1526 let provider = Arc::new(MockOcrProvider::new());
1527 let options = Arc::new(OcrOptions::default());
1528
1529 let mut handles = vec![];
1530
1531 for i in 0..5 {
1533 let provider_clone = Arc::clone(&provider);
1534 let options_clone = Arc::clone(&options);
1535
1536 let handle = thread::spawn(move || {
1537 let jpeg_data = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
1538 let result = provider_clone.process_image(&jpeg_data, &options_clone).unwrap();
1539 assert!(result.text.contains("Mock OCR"));
1540 i
1541 });
1542
1543 handles.push(handle);
1544 }
1545
1546 for handle in handles {
1548 let thread_id = handle.join().unwrap();
1549 assert!(thread_id < 5);
1550 }
1551 }
1552
1553 #[test]
1555 fn test_ocr_engine_display() {
1556 assert_eq!(format!("{}", OcrEngine::Mock), "Mock OCR");
1557 assert_eq!(format!("{}", OcrEngine::Tesseract), "Tesseract");
1558 assert_eq!(format!("{}", OcrEngine::Azure), "Azure Computer Vision");
1559 assert_eq!(format!("{}", OcrEngine::Aws), "AWS Textract");
1560 assert_eq!(format!("{}", OcrEngine::GoogleCloud), "Google Cloud Vision");
1561 }
1562
1563 #[test]
1564 fn test_ocr_engine_equality() {
1565 assert_eq!(OcrEngine::Mock, OcrEngine::Mock);
1566 assert_ne!(OcrEngine::Mock, OcrEngine::Tesseract);
1567
1568 let engine1 = OcrEngine::Azure;
1570 let engine2 = engine1;
1571 assert_eq!(engine1, engine2);
1572 }
1573
1574 #[test]
1575 fn test_ocr_engine_format_support_matrix() {
1576 let _engines = vec![
1578 OcrEngine::Mock,
1579 OcrEngine::Tesseract,
1580 OcrEngine::Azure,
1581 OcrEngine::Aws,
1582 OcrEngine::GoogleCloud,
1583 ];
1584
1585 let formats = vec![
1586 ImageFormat::Jpeg,
1587 ImageFormat::Png,
1588 ImageFormat::Tiff,
1589 ];
1590
1591 let expected = vec![
1593 (OcrEngine::Mock, vec![true, true, true]),
1594 (OcrEngine::Tesseract, vec![true, true, true]),
1595 (OcrEngine::Azure, vec![true, true, false]),
1596 (OcrEngine::Aws, vec![true, true, false]),
1597 (OcrEngine::GoogleCloud, vec![true, true, false]),
1598 ];
1599
1600 for (engine, expected_support) in expected {
1601 for (i, format) in formats.iter().enumerate() {
1602 assert_eq!(
1603 engine.supports_format(*format),
1604 expected_support[i],
1605 "Engine {:?} format {:?} support mismatch",
1606 engine,
1607 format
1608 );
1609 }
1610 }
1611 }
1612
1613 #[test]
1615 fn test_validate_image_data_all_formats() {
1616 let provider = MockOcrProvider::new();
1617
1618 let test_cases = vec![
1620 (vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46], true),
1622 (vec![0xFF, 0xD8, 0xFF, 0xE1, 0x00, 0x10, 0x45, 0x78], true),
1624 (vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], true),
1626 (vec![0x49, 0x49, 0x2A, 0x00, 0x00, 0x00, 0x00, 0x00], true),
1628 (vec![0x4D, 0x4D, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x00], true),
1630 (vec![0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x00, 0x00], false),
1632 (vec![0x42, 0x4D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], false),
1634 (vec![0xFF, 0xD8], false),
1636 (vec![], false),
1638 ];
1639
1640 for (data, should_succeed) in test_cases {
1641 let result = provider.validate_image_data(&data);
1642 assert_eq!(
1643 result.is_ok(),
1644 should_succeed,
1645 "Failed for data: {:?}",
1646 &data[..data.len().min(8)]
1647 );
1648 }
1649 }
1650
1651 #[test]
1652 fn test_ocr_options_with_all_preprocessing() {
1653 let options = OcrOptions {
1654 language: "deu+eng+fra".to_string(),
1655 min_confidence: 0.85,
1656 preserve_layout: true,
1657 preprocessing: ImagePreprocessing {
1658 denoise: true,
1659 deskew: true,
1660 enhance_contrast: true,
1661 sharpen: true,
1662 scale_factor: 1.5,
1663 },
1664 engine_options: {
1665 let mut map = HashMap::new();
1666 map.insert("param1".to_string(), "value1".to_string());
1667 map.insert("param2".to_string(), "value2".to_string());
1668 map
1669 },
1670 timeout_seconds: 120,
1671 };
1672
1673 assert_eq!(options.language, "deu+eng+fra");
1675 assert_eq!(options.min_confidence, 0.85);
1676 assert!(options.preserve_layout);
1677 assert!(options.preprocessing.denoise);
1678 assert!(options.preprocessing.deskew);
1679 assert!(options.preprocessing.enhance_contrast);
1680 assert!(options.preprocessing.sharpen);
1681 assert_eq!(options.preprocessing.scale_factor, 1.5);
1682 assert_eq!(options.engine_options.len(), 2);
1683 assert_eq!(options.timeout_seconds, 120);
1684 }
1685
1686 #[test]
1687 fn test_fragment_boundary_calculations() {
1688 let fragments = vec![
1689 OcrTextFragment {
1690 text: "TopLeft".to_string(),
1691 x: 0.0, y: 0.0, width: 50.0, height: 20.0,
1692 confidence: 0.9, font_size: 12.0,
1693 fragment_type: FragmentType::Word,
1694 },
1695 OcrTextFragment {
1696 text: "BottomRight".to_string(),
1697 x: 550.0, y: 770.0, width: 60.0, height: 20.0,
1698 confidence: 0.9, font_size: 12.0,
1699 fragment_type: FragmentType::Word,
1700 },
1701 ];
1702
1703 let min_x = fragments.iter().map(|f| f.x).fold(f64::INFINITY, f64::min);
1705 let min_y = fragments.iter().map(|f| f.y).fold(f64::INFINITY, f64::min);
1706 let max_x = fragments.iter().map(|f| f.x + f.width).fold(f64::NEG_INFINITY, f64::max);
1707 let max_y = fragments.iter().map(|f| f.y + f.height).fold(f64::NEG_INFINITY, f64::max);
1708
1709 assert_eq!(min_x, 0.0);
1710 assert_eq!(min_y, 0.0);
1711 assert_eq!(max_x, 610.0);
1712 assert_eq!(max_y, 790.0);
1713 }
1714
1715 #[test]
1716 fn test_error_chain_context() {
1717 use std::io::{Error as IoError, ErrorKind};
1718
1719 let io_error = IoError::new(ErrorKind::PermissionDenied, "Access denied to image file");
1721 let ocr_error: OcrError = io_error.into();
1722
1723 let error_chain = format!("{}", ocr_error);
1724 assert!(error_chain.contains("IO error"));
1725
1726 let processing_error = OcrError::ProcessingFailed(
1728 "Failed to process page 5: insufficient memory".to_string()
1729 );
1730 let error_msg = format!("{}", processing_error);
1731 assert!(error_msg.contains("page 5"));
1732 assert!(error_msg.contains("insufficient memory"));
1733 }
1734
1735 #[test]
1736 fn test_concurrent_result_processing() {
1737 use std::sync::{Arc, Mutex};
1738 use std::thread;
1739
1740 let result = Arc::new(OcrProcessingResult {
1742 text: "Concurrent test".to_string(),
1743 confidence: 0.85,
1744 fragments: vec![
1745 OcrTextFragment {
1746 text: "Fragment1".to_string(),
1747 x: 0.0, y: 0.0, width: 100.0, height: 20.0,
1748 confidence: 0.9, font_size: 12.0,
1749 fragment_type: FragmentType::Word,
1750 },
1751 OcrTextFragment {
1752 text: "Fragment2".to_string(),
1753 x: 0.0, y: 25.0, width: 100.0, height: 20.0,
1754 confidence: 0.8, font_size: 12.0,
1755 fragment_type: FragmentType::Word,
1756 },
1757 ],
1758 processing_time_ms: 100,
1759 engine_name: "Test".to_string(),
1760 language: "en".to_string(),
1761 image_dimensions: (200, 100),
1762 });
1763
1764 let counter = Arc::new(Mutex::new(0));
1765 let mut handles = vec![];
1766
1767 for _ in 0..10 {
1769 let result_clone = Arc::clone(&result);
1770 let counter_clone = Arc::clone(&counter);
1771
1772 let handle = thread::spawn(move || {
1773 let _ = result_clone.filter_by_confidence(0.85);
1775 let _ = result_clone.fragments_in_region(0.0, 0.0, 200.0, 100.0);
1776 let _ = result_clone.average_confidence();
1777
1778 let mut count = counter_clone.lock().unwrap();
1779 *count += 1;
1780 });
1781
1782 handles.push(handle);
1783 }
1784
1785 for handle in handles {
1787 handle.join().unwrap();
1788 }
1789
1790 assert_eq!(*counter.lock().unwrap(), 10);
1791 }
1792 }
1793}