Skip to main content

pdf_ocr/
engine.rs

1//! Pluggable OCR engine trait and built-in implementations.
2//!
3//! Provides the `OcrEngine` trait for integrating different OCR backends,
4//! and a `NoOpEngine` for testing.
5
6/// A single word recognized by OCR.
7#[derive(Debug, Clone)]
8pub struct OcrWord {
9    /// The recognized text.
10    pub text: String,
11    /// Bounding box in pixel coordinates [x0, y0, x1, y1].
12    pub bbox_px: [u32; 4],
13    /// Recognition confidence (0.0 to 1.0).
14    pub confidence: f32,
15}
16
17/// Result of OCR processing on a single page.
18#[derive(Debug, Clone)]
19pub struct OcrPageResult {
20    /// Recognized words.
21    pub words: Vec<OcrWord>,
22    /// Overall confidence for the page.
23    pub confidence: f32,
24    /// Width of the source image in pixels.
25    pub image_width: u32,
26    /// Height of the source image in pixels.
27    pub image_height: u32,
28}
29
30impl OcrPageResult {
31    /// Get the full text of the page by joining all words with spaces.
32    pub fn full_text(&self) -> String {
33        self.words
34            .iter()
35            .map(|w| w.text.as_str())
36            .collect::<Vec<_>>()
37            .join(" ")
38    }
39}
40
41/// Trait for pluggable OCR engines.
42///
43/// Implementors provide character recognition on rasterized page images.
44pub trait OcrEngine: Send + Sync {
45    /// Recognize text in an image.
46    ///
47    /// # Arguments
48    /// * `image_data` - Raw pixel data (RGB, row-major).
49    /// * `width` - Image width in pixels.
50    /// * `height` - Image height in pixels.
51    /// * `dpi` - Resolution in dots per inch.
52    fn recognize(
53        &self,
54        image_data: &[u8],
55        width: u32,
56        height: u32,
57        dpi: u32,
58    ) -> std::result::Result<OcrPageResult, String>;
59
60    /// Return the list of supported languages.
61    fn supported_languages(&self) -> Vec<String>;
62}
63
64/// A no-op OCR engine that always returns empty results.
65///
66/// Useful for testing the pipeline without a real OCR backend.
67#[derive(Debug, Default)]
68pub struct NoOpEngine;
69
70impl OcrEngine for NoOpEngine {
71    fn recognize(
72        &self,
73        _image_data: &[u8],
74        width: u32,
75        height: u32,
76        _dpi: u32,
77    ) -> std::result::Result<OcrPageResult, String> {
78        Ok(OcrPageResult {
79            words: Vec::new(),
80            confidence: 0.0,
81            image_width: width,
82            image_height: height,
83        })
84    }
85
86    fn supported_languages(&self) -> Vec<String> {
87        Vec::new()
88    }
89}
90
91#[cfg(test)]
92mod tests {
93    use super::*;
94
95    #[test]
96    fn noop_engine_returns_empty() {
97        let engine = NoOpEngine;
98        let result = engine.recognize(&[], 100, 100, 300).unwrap();
99        assert!(result.words.is_empty());
100        assert_eq!(result.confidence, 0.0);
101        assert_eq!(result.image_width, 100);
102        assert_eq!(result.image_height, 100);
103        assert!(engine.supported_languages().is_empty());
104    }
105
106    #[test]
107    fn ocr_page_result_full_text() {
108        let result = OcrPageResult {
109            words: vec![
110                OcrWord {
111                    text: "Hello".to_string(),
112                    bbox_px: [0, 0, 50, 20],
113                    confidence: 0.95,
114                },
115                OcrWord {
116                    text: "World".to_string(),
117                    bbox_px: [60, 0, 110, 20],
118                    confidence: 0.90,
119                },
120            ],
121            confidence: 0.92,
122            image_width: 200,
123            image_height: 100,
124        };
125        assert_eq!(result.full_text(), "Hello World");
126    }
127}