Skip to main content

spdf_ocr/
engine.rs

1use serde::{Deserialize, Serialize};
2use spdf_types::SpdfResult;
3
4/// OCR invocation options.
5#[derive(Debug, Clone)]
6pub struct OcrOptions {
7    /// Language code(s). HTTP servers use ISO 639-1 (`"en"`); Tesseract uses
8    /// ISO 639-3 (`"eng"`). The caller chooses the right form.
9    pub languages: Vec<String>,
10    pub correct_rotation: bool,
11    /// Render DPI the input image was produced at. Used by Tesseract to size
12    /// characters correctly. `None` leaves it to the engine's heuristics.
13    pub dpi: Option<u32>,
14}
15
16impl Default for OcrOptions {
17    fn default() -> Self {
18        Self {
19            languages: vec!["en".into()],
20            correct_rotation: false,
21            dpi: None,
22        }
23    }
24}
25
26/// One detected text region.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct OcrResult {
29    pub text: String,
30    /// `[x1, y1, x2, y2]` in image pixels.
31    pub bbox: [f64; 4],
32    pub confidence: f64,
33}
34
35/// OCR engine contract. Mirrors `OcrEngine` in
36/// `liteparse/src/engines/ocr/interface.ts`.
37pub trait OcrEngine: Send + Sync {
38    fn name(&self) -> &'static str;
39
40    /// Run OCR on an image (PNG/JPEG bytes).
41    fn recognize(&self, image: &[u8], options: &OcrOptions) -> SpdfResult<Vec<OcrResult>>;
42
43    /// Default batch impl delegates to `recognize` sequentially. Engines with
44    /// real batch APIs (e.g. a remote server that accepts a JSON array) should
45    /// override for a meaningful throughput win.
46    fn recognize_batch(
47        &self,
48        images: &[&[u8]],
49        options: &OcrOptions,
50    ) -> SpdfResult<Vec<Vec<OcrResult>>> {
51        images
52            .iter()
53            .map(|img| self.recognize(img, options))
54            .collect()
55    }
56}