1use super::{
6 BlockType, BoundingBox, Column, OCRConfig, OCREngine, OCREngineType, OCRResult, OCRWord,
7 TextBlock, TextLayout,
8};
9use crate::{RragError, RragResult};
10use std::collections::HashMap;
11use std::path::Path;
12
13pub struct DefaultOCREngine {
15 config: OCRConfig,
17
18 primary_engine: Box<dyn OCREngineImpl>,
20
21 fallback_engines: Vec<Box<dyn OCREngineImpl>>,
23
24 post_processor: TextPostProcessor,
26
27 layout_analyzer: OCRLayoutAnalyzer,
29}
30
31pub trait OCREngineImpl: Send + Sync {
33 fn extract_text(&self, image_path: &Path) -> RragResult<OCRResult>;
35
36 fn capabilities(&self) -> EngineCapabilities;
38
39 fn name(&self) -> &str;
41}
42
43#[derive(Debug, Clone)]
45pub struct EngineCapabilities {
46 pub languages: Vec<String>,
48
49 pub layout_detection: bool,
51
52 pub confidence_scores: bool,
54
55 pub word_level: bool,
57
58 pub speed: ProcessingSpeed,
60
61 pub accuracy: AccuracyLevel,
63}
64
65#[derive(Debug, Clone, Copy)]
67pub enum ProcessingSpeed {
68 Fast,
69 Medium,
70 Slow,
71}
72
73#[derive(Debug, Clone, Copy)]
75pub enum AccuracyLevel {
76 Low,
77 Medium,
78 High,
79}
80
81pub struct TextPostProcessor {
83 spell_checker: Option<SpellChecker>,
85
86 language_detector: LanguageDetector,
88
89 formatter: TextFormatter,
91}
92
93pub struct SpellChecker {
95 dictionaries: HashMap<String, String>,
97
98 confidence_threshold: f32,
100}
101
102pub struct LanguageDetector {
104 supported_languages: Vec<String>,
106
107 min_confidence: f32,
109}
110
111pub struct TextFormatter {
113 preserve_line_breaks: bool,
115
116 preserve_spacing: bool,
118
119 cleanup_artifacts: bool,
121}
122
123pub struct OCRLayoutAnalyzer {
125 block_threshold: f32,
127
128 column_detection: bool,
130
131 reading_order_detection: bool,
133}
134
135pub struct TesseractEngine {
137 languages: Vec<String>,
139
140 ocr_mode: TesseractOCRMode,
142
143 psm: PageSegmentationMode,
145}
146
147#[derive(Debug, Clone, Copy)]
149pub enum TesseractOCRMode {
150 LegacyOnly,
151 NeuralOnly,
152 LegacyAndNeural,
153}
154
155#[derive(Debug, Clone, Copy)]
157pub enum PageSegmentationMode {
158 Auto,
159 SingleColumn,
160 SingleBlockVertText,
161 SingleBlock,
162 SingleLine,
163 SingleWord,
164 SingleCharacter,
165 SparseText,
166}
167
168pub struct EasyOCREngine {
170 languages: Vec<String>,
172
173 gpu_enabled: bool,
175
176 detection_model: String,
178
179 recognition_model: String,
181}
182
183pub struct PaddleOCREngine {
185 language: String,
187
188 precision: ModelPrecision,
190
191 direction_detection: bool,
193}
194
195#[derive(Debug, Clone, Copy)]
197pub enum ModelPrecision {
198 FP16,
199 FP32,
200 INT8,
201}
202
203pub struct CloudVisionEngine {
205 credentials: CloudCredentials,
207
208 endpoint: String,
210
211 timeout_ms: u64,
213}
214
215#[derive(Debug, Clone)]
217pub struct CloudCredentials {
218 pub api_key: String,
219 pub project_id: Option<String>,
220 pub region: Option<String>,
221}
222
223#[derive(Debug, Clone)]
225pub struct OCRQuality {
226 pub overall_confidence: f32,
228
229 pub text_quality: f32,
231
232 pub layout_quality: f32,
234
235 pub language_confidence: f32,
237
238 pub issues: Vec<QualityIssue>,
240}
241
242#[derive(Debug, Clone)]
244pub struct QualityIssue {
245 pub issue_type: OCRIssueType,
247
248 pub description: String,
250
251 pub severity: IssueSeverity,
253
254 pub location: Option<BoundingBox>,
256
257 pub suggested_fix: Option<String>,
259}
260
261#[derive(Debug, Clone, Copy)]
263pub enum OCRIssueType {
264 LowConfidence,
265 PoorImageQuality,
266 UnsupportedLanguage,
267 LayoutComplexity,
268 FontIssues,
269 SkewedText,
270 NoiseArtifacts,
271}
272
273#[derive(Debug, Clone, Copy)]
275pub enum IssueSeverity {
276 Low,
277 Medium,
278 High,
279 Critical,
280}
281
282impl DefaultOCREngine {
283 pub fn new(config: OCRConfig) -> RragResult<Self> {
285 let primary_engine = Self::create_engine(config.engine, &config)?;
286 let fallback_engines = Self::create_fallback_engines(&config)?;
287 let post_processor = TextPostProcessor::new(&config)?;
288 let layout_analyzer = OCRLayoutAnalyzer::new();
289
290 Ok(Self {
291 config,
292 primary_engine,
293 fallback_engines,
294 post_processor,
295 layout_analyzer,
296 })
297 }
298
299 fn create_engine(
301 engine_type: OCREngineType,
302 config: &OCRConfig,
303 ) -> RragResult<Box<dyn OCREngineImpl>> {
304 match engine_type {
305 OCREngineType::Tesseract => {
306 Ok(Box::new(TesseractEngine::new(config.languages.clone())?))
307 }
308 OCREngineType::EasyOCR => Ok(Box::new(EasyOCREngine::new(config.languages.clone())?)),
309 OCREngineType::PaddleOCR => {
310 let lang = config
311 .languages
312 .first()
313 .unwrap_or(&"en".to_string())
314 .clone();
315 Ok(Box::new(PaddleOCREngine::new(lang)?))
316 }
317 OCREngineType::CloudVision => Ok(Box::new(CloudVisionEngine::new()?)),
318 }
319 }
320
321 fn create_fallback_engines(config: &OCRConfig) -> RragResult<Vec<Box<dyn OCREngineImpl>>> {
323 let mut engines = Vec::new();
324
325 if config.engine != OCREngineType::Tesseract {
327 engines
328 .push(Box::new(TesseractEngine::new(config.languages.clone())?)
329 as Box<dyn OCREngineImpl>);
330 }
331
332 if config.engine != OCREngineType::EasyOCR {
334 engines
335 .push(Box::new(EasyOCREngine::new(config.languages.clone())?)
336 as Box<dyn OCREngineImpl>);
337 }
338
339 Ok(engines)
340 }
341
342 pub fn ocr_with_fallback(&self, image_path: &Path) -> RragResult<OCRResult> {
344 match self.primary_engine.extract_text(image_path) {
346 Ok(result) if result.confidence >= self.config.confidence_threshold => {
347 return Ok(result);
348 }
349 Ok(primary_result) => {
350 for fallback in &self.fallback_engines {
352 if let Ok(fallback_result) = fallback.extract_text(image_path) {
353 if fallback_result.confidence > primary_result.confidence {
354 return Ok(fallback_result);
355 }
356 }
357 }
358 Ok(primary_result)
360 }
361 Err(_) => {
362 for fallback in &self.fallback_engines {
364 if let Ok(result) = fallback.extract_text(image_path) {
365 return Ok(result);
366 }
367 }
368 Err(RragError::document_processing("All OCR engines failed"))
369 }
370 }
371 }
372
373 pub fn assess_quality(&self, result: &OCRResult) -> OCRQuality {
375 let mut issues = Vec::new();
376
377 if result.confidence < 0.7 {
379 issues.push(QualityIssue {
380 issue_type: OCRIssueType::LowConfidence,
381 description: format!("Overall confidence is low: {:.2}", result.confidence),
382 severity: if result.confidence < 0.5 {
383 IssueSeverity::High
384 } else {
385 IssueSeverity::Medium
386 },
387 location: None,
388 suggested_fix: Some(
389 "Consider using a higher resolution image or different OCR engine".to_string(),
390 ),
391 });
392 }
393
394 let low_confidence_words = result.words.iter().filter(|w| w.confidence < 0.5).count();
396
397 if low_confidence_words > result.words.len() / 4 {
398 issues.push(QualityIssue {
399 issue_type: OCRIssueType::LowConfidence,
400 description: format!("{} words have low confidence", low_confidence_words),
401 severity: IssueSeverity::Medium,
402 location: None,
403 suggested_fix: Some(
404 "Manual review recommended for low-confidence words".to_string(),
405 ),
406 });
407 }
408
409 OCRQuality {
410 overall_confidence: result.confidence,
411 text_quality: self.calculate_text_quality(result),
412 layout_quality: 0.8, language_confidence: 0.9, issues,
415 }
416 }
417
418 fn calculate_text_quality(&self, result: &OCRResult) -> f32 {
420 if result.words.is_empty() {
421 return 0.0;
422 }
423
424 let avg_confidence =
426 result.words.iter().map(|w| w.confidence).sum::<f32>() / result.words.len() as f32;
427
428 let short_words = result.words.iter().filter(|w| w.text.len() <= 2).count();
430 let short_word_penalty = (short_words as f32 / result.words.len() as f32) * 0.2;
431
432 (avg_confidence - short_word_penalty).max(0.0)
433 }
434}
435
436impl OCREngine for DefaultOCREngine {
437 fn ocr(&self, image_path: &Path) -> RragResult<OCRResult> {
438 let mut result = self.ocr_with_fallback(image_path)?;
439
440 if self.config.spell_correction {
442 result = self.post_processor.process(result)?;
443 }
444
445 Ok(result)
446 }
447
448 fn get_text_with_confidence(&self, image_path: &Path) -> RragResult<Vec<(String, f32)>> {
449 let result = self.ocr(image_path)?;
450 Ok(result
451 .words
452 .into_iter()
453 .map(|word| (word.text, word.confidence))
454 .collect())
455 }
456
457 fn get_layout(&self, image_path: &Path) -> RragResult<TextLayout> {
458 let result = self.ocr(image_path)?;
459 self.layout_analyzer.analyze_layout(&result)
460 }
461}
462
463impl TesseractEngine {
464 pub fn new(languages: Vec<String>) -> RragResult<Self> {
466 Ok(Self {
467 languages,
468 ocr_mode: TesseractOCRMode::LegacyAndNeural,
469 psm: PageSegmentationMode::Auto,
470 })
471 }
472}
473
474impl OCREngineImpl for TesseractEngine {
475 fn extract_text(&self, image_path: &Path) -> RragResult<OCRResult> {
476 let text = format!(
478 "Sample text extracted from {:?}",
479 image_path.file_name().unwrap_or_default()
480 );
481
482 let words = vec![
483 OCRWord {
484 text: "Sample".to_string(),
485 confidence: 0.95,
486 bounding_box: BoundingBox {
487 x: 10,
488 y: 10,
489 width: 50,
490 height: 20,
491 },
492 },
493 OCRWord {
494 text: "text".to_string(),
495 confidence: 0.90,
496 bounding_box: BoundingBox {
497 x: 65,
498 y: 10,
499 width: 30,
500 height: 20,
501 },
502 },
503 ];
504
505 Ok(OCRResult {
506 text,
507 confidence: 0.925,
508 words,
509 languages: self.languages.clone(),
510 })
511 }
512
513 fn capabilities(&self) -> EngineCapabilities {
514 EngineCapabilities {
515 languages: vec!["eng", "fra", "deu", "spa", "chi_sim"]
516 .iter()
517 .map(|s| s.to_string())
518 .collect(),
519 layout_detection: true,
520 confidence_scores: true,
521 word_level: true,
522 speed: ProcessingSpeed::Medium,
523 accuracy: AccuracyLevel::High,
524 }
525 }
526
527 fn name(&self) -> &str {
528 "Tesseract"
529 }
530}
531
532impl EasyOCREngine {
533 pub fn new(languages: Vec<String>) -> RragResult<Self> {
535 Ok(Self {
536 languages,
537 gpu_enabled: false,
538 detection_model: "craft".to_string(),
539 recognition_model: "crnn".to_string(),
540 })
541 }
542}
543
544impl OCREngineImpl for EasyOCREngine {
545 fn extract_text(&self, image_path: &Path) -> RragResult<OCRResult> {
546 let text = format!(
548 "EasyOCR extracted text from {:?}",
549 image_path.file_name().unwrap_or_default()
550 );
551
552 let words = vec![
553 OCRWord {
554 text: "EasyOCR".to_string(),
555 confidence: 0.88,
556 bounding_box: BoundingBox {
557 x: 5,
558 y: 5,
559 width: 60,
560 height: 25,
561 },
562 },
563 OCRWord {
564 text: "extracted".to_string(),
565 confidence: 0.92,
566 bounding_box: BoundingBox {
567 x: 70,
568 y: 5,
569 width: 70,
570 height: 25,
571 },
572 },
573 ];
574
575 Ok(OCRResult {
576 text,
577 confidence: 0.90,
578 words,
579 languages: self.languages.clone(),
580 })
581 }
582
583 fn capabilities(&self) -> EngineCapabilities {
584 EngineCapabilities {
585 languages: vec!["en", "ch_sim", "ch_tra", "ja", "ko", "fr", "de"]
586 .iter()
587 .map(|s| s.to_string())
588 .collect(),
589 layout_detection: true,
590 confidence_scores: true,
591 word_level: true,
592 speed: ProcessingSpeed::Fast,
593 accuracy: AccuracyLevel::Medium,
594 }
595 }
596
597 fn name(&self) -> &str {
598 "EasyOCR"
599 }
600}
601
602impl PaddleOCREngine {
603 pub fn new(language: String) -> RragResult<Self> {
605 Ok(Self {
606 language,
607 precision: ModelPrecision::FP32,
608 direction_detection: true,
609 })
610 }
611}
612
613impl OCREngineImpl for PaddleOCREngine {
614 fn extract_text(&self, image_path: &Path) -> RragResult<OCRResult> {
615 let text = format!(
617 "PaddleOCR text from {:?}",
618 image_path.file_name().unwrap_or_default()
619 );
620
621 let words = vec![OCRWord {
622 text: "PaddleOCR".to_string(),
623 confidence: 0.93,
624 bounding_box: BoundingBox {
625 x: 8,
626 y: 8,
627 width: 80,
628 height: 22,
629 },
630 }];
631
632 Ok(OCRResult {
633 text,
634 confidence: 0.93,
635 words,
636 languages: vec![self.language.clone()],
637 })
638 }
639
640 fn capabilities(&self) -> EngineCapabilities {
641 EngineCapabilities {
642 languages: vec!["ch", "en", "fr", "german", "japan", "korean"]
643 .iter()
644 .map(|s| s.to_string())
645 .collect(),
646 layout_detection: true,
647 confidence_scores: true,
648 word_level: true,
649 speed: ProcessingSpeed::Fast,
650 accuracy: AccuracyLevel::High,
651 }
652 }
653
654 fn name(&self) -> &str {
655 "PaddleOCR"
656 }
657}
658
659impl CloudVisionEngine {
660 pub fn new() -> RragResult<Self> {
662 Ok(Self {
663 credentials: CloudCredentials {
664 api_key: "demo_key".to_string(),
665 project_id: Some("demo_project".to_string()),
666 region: Some("us-central1".to_string()),
667 },
668 endpoint: "https://vision.googleapis.com".to_string(),
669 timeout_ms: 30000,
670 })
671 }
672}
673
674impl OCREngineImpl for CloudVisionEngine {
675 fn extract_text(&self, image_path: &Path) -> RragResult<OCRResult> {
676 let text = format!(
678 "Cloud Vision text from {:?}",
679 image_path.file_name().unwrap_or_default()
680 );
681
682 let words = vec![
683 OCRWord {
684 text: "Cloud".to_string(),
685 confidence: 0.98,
686 bounding_box: BoundingBox {
687 x: 12,
688 y: 12,
689 width: 45,
690 height: 18,
691 },
692 },
693 OCRWord {
694 text: "Vision".to_string(),
695 confidence: 0.97,
696 bounding_box: BoundingBox {
697 x: 60,
698 y: 12,
699 width: 50,
700 height: 18,
701 },
702 },
703 ];
704
705 Ok(OCRResult {
706 text,
707 confidence: 0.975,
708 words,
709 languages: vec!["en".to_string()],
710 })
711 }
712
713 fn capabilities(&self) -> EngineCapabilities {
714 EngineCapabilities {
715 languages: vec!["en", "zh", "ja", "ko", "hi", "ar", "fr", "de", "es", "pt"]
716 .iter()
717 .map(|s| s.to_string())
718 .collect(),
719 layout_detection: true,
720 confidence_scores: true,
721 word_level: true,
722 speed: ProcessingSpeed::Slow, accuracy: AccuracyLevel::High,
724 }
725 }
726
727 fn name(&self) -> &str {
728 "Cloud Vision"
729 }
730}
731
732impl TextPostProcessor {
733 pub fn new(config: &OCRConfig) -> RragResult<Self> {
735 let spell_checker = if config.spell_correction {
736 Some(SpellChecker::new(&config.languages)?)
737 } else {
738 None
739 };
740
741 let language_detector = LanguageDetector::new(config.languages.clone());
742 let formatter = TextFormatter::new(config.preserve_formatting);
743
744 Ok(Self {
745 spell_checker,
746 language_detector,
747 formatter,
748 })
749 }
750
751 pub fn process(&self, mut result: OCRResult) -> RragResult<OCRResult> {
753 if let Some(ref checker) = self.spell_checker {
755 result = checker.correct(result)?;
756 }
757
758 let detected_languages = self.language_detector.detect(&result.text)?;
760 if !detected_languages.is_empty() {
761 result.languages = detected_languages;
762 }
763
764 result = self.formatter.format(result)?;
766
767 Ok(result)
768 }
769}
770
771impl SpellChecker {
772 pub fn new(languages: &[String]) -> RragResult<Self> {
774 let mut dictionaries = HashMap::new();
775 for lang in languages {
776 dictionaries.insert(lang.clone(), format!("dict_{}.txt", lang));
777 }
778
779 Ok(Self {
780 dictionaries,
781 confidence_threshold: 0.7,
782 })
783 }
784
785 pub fn correct(&self, mut result: OCRResult) -> RragResult<OCRResult> {
787 for word in &mut result.words {
789 if word.confidence < self.confidence_threshold {
790 word.text = self.suggest_correction(&word.text);
791 word.confidence = (word.confidence + 0.1).min(1.0);
792 }
793 }
794
795 result.text = result
797 .words
798 .iter()
799 .map(|w| w.text.clone())
800 .collect::<Vec<_>>()
801 .join(" ");
802
803 Ok(result)
804 }
805
806 fn suggest_correction(&self, word: &str) -> String {
808 match word.to_lowercase().as_str() {
810 "teh" => "the".to_string(),
811 "adn" => "and".to_string(),
812 "taht" => "that".to_string(),
813 _ => word.to_string(),
814 }
815 }
816}
817
818impl LanguageDetector {
819 pub fn new(supported_languages: Vec<String>) -> Self {
821 Self {
822 supported_languages,
823 min_confidence: 0.8,
824 }
825 }
826
827 pub fn detect(&self, text: &str) -> RragResult<Vec<String>> {
829 if text.chars().any(|c| c as u32 > 127) {
831 if text.chars().any(|c| '\u{4e00}' <= c && c <= '\u{9fff}') {
833 Ok(vec!["zh".to_string()])
834 } else if text.chars().any(|c| '\u{3040}' <= c && c <= '\u{309f}') {
835 Ok(vec!["ja".to_string()])
836 } else {
837 Ok(vec!["en".to_string()]) }
839 } else {
840 Ok(vec!["en".to_string()])
841 }
842 }
843}
844
845impl TextFormatter {
846 pub fn new(preserve_formatting: bool) -> Self {
848 Self {
849 preserve_line_breaks: preserve_formatting,
850 preserve_spacing: preserve_formatting,
851 cleanup_artifacts: true,
852 }
853 }
854
855 pub fn format(&self, mut result: OCRResult) -> RragResult<OCRResult> {
857 if self.cleanup_artifacts {
858 result.text = self.cleanup_text(&result.text);
859 }
860
861 if !self.preserve_spacing {
862 result.text = self.normalize_spacing(&result.text);
863 }
864
865 if !self.preserve_line_breaks {
866 result.text = result.text.replace('\n', " ");
867 }
868
869 Ok(result)
870 }
871
872 fn cleanup_text(&self, text: &str) -> String {
874 text.chars()
875 .filter(|&c| c.is_ascii_graphic() || c.is_whitespace())
876 .collect::<String>()
877 .trim()
878 .to_string()
879 }
880
881 fn normalize_spacing(&self, text: &str) -> String {
883 text.split_whitespace().collect::<Vec<_>>().join(" ")
884 }
885}
886
887impl OCRLayoutAnalyzer {
888 pub fn new() -> Self {
890 Self {
891 block_threshold: 0.1,
892 column_detection: true,
893 reading_order_detection: true,
894 }
895 }
896
897 pub fn analyze_layout(&self, result: &OCRResult) -> RragResult<TextLayout> {
899 let blocks = self.detect_blocks(result)?;
900 let reading_order = self.determine_reading_order(&blocks)?;
901 let columns = if self.column_detection {
902 Some(self.detect_columns(&blocks)?)
903 } else {
904 None
905 };
906
907 Ok(TextLayout {
908 blocks,
909 reading_order,
910 columns,
911 })
912 }
913
914 fn detect_blocks(&self, result: &OCRResult) -> RragResult<Vec<TextBlock>> {
916 let mut blocks = Vec::new();
917
918 let mut current_block_words = Vec::new();
920 let mut current_y = 0u32;
921
922 for word in &result.words {
923 if current_block_words.is_empty()
924 || (word.bounding_box.y as i32 - current_y as i32).abs() < 10
925 {
926 current_block_words.push(word);
927 current_y = word.bounding_box.y;
928 } else {
929 if !current_block_words.is_empty() {
931 blocks.push(self.create_block_from_words(¤t_block_words, blocks.len()));
932 }
933 current_block_words = vec![word];
934 current_y = word.bounding_box.y;
935 }
936 }
937
938 if !current_block_words.is_empty() {
940 blocks.push(self.create_block_from_words(¤t_block_words, blocks.len()));
941 }
942
943 Ok(blocks)
944 }
945
946 fn create_block_from_words(&self, words: &[&OCRWord], id: usize) -> TextBlock {
948 let text = words
949 .iter()
950 .map(|w| w.text.as_str())
951 .collect::<Vec<_>>()
952 .join(" ");
953
954 let min_x = words.iter().map(|w| w.bounding_box.x).min().unwrap_or(0);
956 let min_y = words.iter().map(|w| w.bounding_box.y).min().unwrap_or(0);
957 let max_x = words
958 .iter()
959 .map(|w| w.bounding_box.x + w.bounding_box.width)
960 .max()
961 .unwrap_or(0);
962 let max_y = words
963 .iter()
964 .map(|w| w.bounding_box.y + w.bounding_box.height)
965 .max()
966 .unwrap_or(0);
967
968 let bounding_box = BoundingBox {
969 x: min_x,
970 y: min_y,
971 width: max_x - min_x,
972 height: max_y - min_y,
973 };
974
975 let block_type = if text.len() < 20 && words.len() <= 3 {
977 BlockType::Title
978 } else if text.ends_with(':') {
979 BlockType::Heading
980 } else {
981 BlockType::Paragraph
982 };
983
984 TextBlock {
985 id,
986 text,
987 bounding_box,
988 block_type,
989 }
990 }
991
992 fn determine_reading_order(&self, blocks: &[TextBlock]) -> RragResult<Vec<usize>> {
994 if !self.reading_order_detection {
995 return Ok((0..blocks.len()).collect());
996 }
997
998 let mut indexed_blocks: Vec<(usize, &TextBlock)> = blocks.iter().enumerate().collect();
1000 indexed_blocks.sort_by(|a, b| {
1001 a.1.bounding_box
1002 .y
1003 .cmp(&b.1.bounding_box.y)
1004 .then_with(|| a.1.bounding_box.x.cmp(&b.1.bounding_box.x))
1005 });
1006
1007 Ok(indexed_blocks.into_iter().map(|(idx, _)| idx).collect())
1008 }
1009
1010 fn detect_columns(&self, blocks: &[TextBlock]) -> RragResult<Vec<Column>> {
1012 let mut columns = Vec::new();
1014
1015 if blocks.is_empty() {
1016 return Ok(columns);
1017 }
1018
1019 let mut x_groups: std::collections::HashMap<u32, Vec<usize>> =
1021 std::collections::HashMap::new();
1022
1023 for (idx, block) in blocks.iter().enumerate() {
1024 let x_group = (block.bounding_box.x / 100) * 100; x_groups.entry(x_group).or_insert_with(Vec::new).push(idx);
1026 }
1027
1028 for (_x_pos, block_indices) in x_groups {
1030 columns.push(Column {
1031 index: columns.len(),
1032 blocks: block_indices,
1033 width: 100, });
1035 }
1036
1037 columns.sort_by_key(|c| c.index);
1039
1040 Ok(columns)
1041 }
1042}
1043
1044#[cfg(test)]
1045mod tests {
1046 use super::*;
1047 use tempfile::NamedTempFile;
1048
1049 #[test]
1050 fn test_ocr_engine_creation() {
1051 let config = OCRConfig::default();
1052 let engine = DefaultOCREngine::new(config).unwrap();
1053
1054 assert_eq!(engine.config.confidence_threshold, 0.7);
1055 assert!(engine.config.spell_correction);
1056 }
1057
1058 #[test]
1059 fn test_tesseract_engine() {
1060 let engine = TesseractEngine::new(vec!["eng".to_string()]).unwrap();
1061 let capabilities = engine.capabilities();
1062
1063 assert!(capabilities.confidence_scores);
1064 assert!(capabilities.layout_detection);
1065 assert_eq!(engine.name(), "Tesseract");
1066 }
1067
1068 #[test]
1069 fn test_spell_checker() {
1070 let checker = SpellChecker::new(&["en".to_string()]).unwrap();
1071 let correction = checker.suggest_correction("teh");
1072 assert_eq!(correction, "the");
1073 }
1074
1075 #[test]
1076 fn test_language_detector() {
1077 let detector = LanguageDetector::new(vec!["en".to_string(), "zh".to_string()]);
1078
1079 let english_result = detector.detect("Hello world").unwrap();
1080 assert_eq!(english_result, vec!["en"]);
1081
1082 let chinese_result = detector.detect("你好世界").unwrap();
1083 assert_eq!(chinese_result, vec!["zh"]);
1084 }
1085
1086 #[test]
1087 fn test_text_formatter() {
1088 let formatter = TextFormatter::new(false);
1089
1090 let result = OCRResult {
1091 text: " Hello world \n test ".to_string(),
1092 confidence: 0.9,
1093 words: vec![],
1094 languages: vec!["en".to_string()],
1095 };
1096
1097 let formatted = formatter.format(result).unwrap();
1098 assert_eq!(formatted.text, "Hello world test");
1099 }
1100
1101 #[test]
1102 fn test_layout_analysis() {
1103 let analyzer = OCRLayoutAnalyzer::new();
1104
1105 let result = OCRResult {
1106 text: "Sample text".to_string(),
1107 confidence: 0.9,
1108 words: vec![
1109 OCRWord {
1110 text: "Sample".to_string(),
1111 confidence: 0.9,
1112 bounding_box: BoundingBox {
1113 x: 10,
1114 y: 10,
1115 width: 50,
1116 height: 20,
1117 },
1118 },
1119 OCRWord {
1120 text: "text".to_string(),
1121 confidence: 0.9,
1122 bounding_box: BoundingBox {
1123 x: 65,
1124 y: 10,
1125 width: 30,
1126 height: 20,
1127 },
1128 },
1129 ],
1130 languages: vec!["en".to_string()],
1131 };
1132
1133 let layout = analyzer.analyze_layout(&result).unwrap();
1134 assert!(!layout.blocks.is_empty());
1135 assert!(!layout.reading_order.is_empty());
1136 }
1137}