pdf_ocr/error.rs
1//! Error types for `pdf-ocr` operations.
2
3use thiserror::Error;
4
5/// Errors returned by the `pdf-ocr` crate during optical character
6/// recognition.
7///
8/// Wrapping engines (Tesseract, PaddleOCR) bubble their own errors up via
9/// the [`OcrError::Engine`] variant — this crate does not classify
10/// engine-specific failures.
11#[derive(Debug, Error)]
12pub enum OcrError {
13 /// The underlying PDF byte stream could not be parsed before OCR could
14 /// be attempted (malformed cross-reference table, truncated stream,
15 /// encrypted document opened without a password, etc.).
16 #[error("PDF error: {0}")]
17 Pdf(#[from] lopdf::Error),
18
19 /// An I/O error occurred while reading the source PDF or writing OCR
20 /// output (e.g. searchable PDF or hOCR sidecar).
21 #[error("IO error: {0}")]
22 Io(#[from] std::io::Error),
23
24 /// The caller asked to OCR a page index that does not exist in the
25 /// document.
26 ///
27 /// Fields: `(requested_page, total_pages)`. Page numbers are 1-based.
28 #[error("page {0} out of range (document has {1} pages)")]
29 PageOutOfRange(u32, u32),
30
31 /// The configured OCR engine returned a non-recoverable error
32 /// (Tesseract initialisation failure, missing language data, model
33 /// load failure, etc.). The wrapped string is the engine's verbatim
34 /// message — surface it to the user.
35 #[error("OCR engine error: {0}")]
36 Engine(String),
37
38 /// Rasterising the page for OCR failed before the engine could see
39 /// any pixels (font resolution failure, unsupported colorspace,
40 /// page geometry error, etc.).
41 #[error("render error: {0}")]
42 Render(String),
43
44 /// A non-categorised OCR failure. Reserved for cases the more specific
45 /// variants do not cover; the message describes the situation.
46 #[error("{0}")]
47 Other(String),
48}
49
50/// Convenience `Result` alias for fallible `pdf-ocr` operations.
51pub type Result<T> = std::result::Result<T, OcrError>;