Skip to main content

pdfmuse_core/backend/
mod.rs

1//! Pluggable vision backend — the ML boundary.
2//!
3//! The core is deterministic and has **zero ML dependencies**. Everything that
4//! needs a model — OCR for scanned pages, structure recognition for borderless
5//! tables — goes through this trait, not the core. The default [`NoopBackend`]
6//! does no inference: a scanned page is surfaced as a `NeedsOcr` warning and left
7//! for a real backend (an ONNX/Tesseract crate, or a Python-side cloud OCR) to
8//! fill in. Reference backends live in separate optional crates so the core never
9//! links a model runtime.
10
11use crate::ir::{Cell, Char};
12
13/// A backend that can recover content the deterministic core cannot.
14pub trait VisionBackend: Send + Sync {
15    /// OCR a rasterized page (PNG at `dpi`) into positioned characters.
16    fn ocr_page(&self, page_png: &[u8], dpi: u32) -> Result<Vec<Char>, BackendError>;
17
18    /// Recognize the cell structure of a borderless-table region (PNG).
19    fn detect_table(&self, region_png: &[u8]) -> Result<Vec<Vec<Cell>>, BackendError>;
20}
21
22/// The default backend: no model inference. Scanned pages surface as warnings,
23/// keeping the core free of any ML runtime.
24#[derive(Debug, Default, Clone, Copy)]
25pub struct NoopBackend;
26
27impl VisionBackend for NoopBackend {
28    fn ocr_page(&self, _page_png: &[u8], _dpi: u32) -> Result<Vec<Char>, BackendError> {
29        Err(BackendError::Unsupported)
30    }
31
32    fn detect_table(&self, _region_png: &[u8]) -> Result<Vec<Vec<Cell>>, BackendError> {
33        Err(BackendError::Unsupported)
34    }
35}
36
37/// An error from a vision backend.
38#[derive(Debug, thiserror::Error)]
39pub enum BackendError {
40    /// This backend does not implement the requested operation (e.g. [`NoopBackend`]).
41    #[error("operation not supported by this backend")]
42    Unsupported,
43    /// The backend failed at runtime.
44    #[error("backend failure: {0}")]
45    Failed(String),
46}
47
48#[cfg(test)]
49mod tests {
50    use super::*;
51
52    #[test]
53    fn noop_backend_does_no_inference() {
54        let b = NoopBackend;
55        assert!(matches!(b.ocr_page(&[], 300), Err(BackendError::Unsupported)));
56        assert!(matches!(b.detect_table(&[]), Err(BackendError::Unsupported)));
57    }
58}