pdfmuse_core/backend/mod.rs
1//! Pluggable vision backend — the ML boundary.
2//!
3//! The core is deterministic and has **zero ML dependencies**. Everything that
4//! needs a model — OCR for scanned pages, structure recognition for borderless
5//! tables — goes through this trait, not the core. The default [`NoopBackend`]
6//! does no inference: a scanned page is surfaced as a `NeedsOcr` warning and left
7//! for a real backend (an ONNX/Tesseract crate, or a Python-side cloud OCR) to
8//! fill in. Reference backends live in separate optional crates so the core never
9//! links a model runtime.
10
11use crate::ir::{Cell, Char};
12
13/// A backend that can recover content the deterministic core cannot.
14pub trait VisionBackend: Send + Sync {
15 /// OCR a rasterized page (PNG at `dpi`) into positioned characters.
16 fn ocr_page(&self, page_png: &[u8], dpi: u32) -> Result<Vec<Char>, BackendError>;
17
18 /// Recognize the cell structure of a borderless-table region (PNG).
19 fn detect_table(&self, region_png: &[u8]) -> Result<Vec<Vec<Cell>>, BackendError>;
20}
21
22/// The default backend: no model inference. Scanned pages surface as warnings,
23/// keeping the core free of any ML runtime.
24#[derive(Debug, Default, Clone, Copy)]
25pub struct NoopBackend;
26
27impl VisionBackend for NoopBackend {
28 fn ocr_page(&self, _page_png: &[u8], _dpi: u32) -> Result<Vec<Char>, BackendError> {
29 Err(BackendError::Unsupported)
30 }
31
32 fn detect_table(&self, _region_png: &[u8]) -> Result<Vec<Vec<Cell>>, BackendError> {
33 Err(BackendError::Unsupported)
34 }
35}
36
37/// An error from a vision backend.
38#[derive(Debug, thiserror::Error)]
39pub enum BackendError {
40 /// This backend does not implement the requested operation (e.g. [`NoopBackend`]).
41 #[error("operation not supported by this backend")]
42 Unsupported,
43 /// The backend failed at runtime.
44 #[error("backend failure: {0}")]
45 Failed(String),
46}
47
48#[cfg(test)]
49mod tests {
50 use super::*;
51
52 #[test]
53 fn noop_backend_does_no_inference() {
54 let b = NoopBackend;
55 assert!(matches!(b.ocr_page(&[], 300), Err(BackendError::Unsupported)));
56 assert!(matches!(b.detect_table(&[]), Err(BackendError::Unsupported)));
57 }
58}