Skip to main content

pdf_ocr/
error.rs

1//! Error types for `pdf-ocr` operations.
2
3use thiserror::Error;
4
5/// Errors returned by the `pdf-ocr` crate during optical character
6/// recognition.
7///
8/// Wrapping engines (Tesseract, PaddleOCR) bubble their own errors up via
9/// the [`OcrError::Engine`] variant — this crate does not classify
10/// engine-specific failures.
11#[derive(Debug, Error)]
12pub enum OcrError {
13    /// The underlying PDF byte stream could not be parsed before OCR could
14    /// be attempted (malformed cross-reference table, truncated stream,
15    /// encrypted document opened without a password, etc.).
16    #[error("PDF error: {0}")]
17    Pdf(#[from] lopdf::Error),
18
19    /// An I/O error occurred while reading the source PDF or writing OCR
20    /// output (e.g. searchable PDF or hOCR sidecar).
21    #[error("IO error: {0}")]
22    Io(#[from] std::io::Error),
23
24    /// The caller asked to OCR a page index that does not exist in the
25    /// document.
26    ///
27    /// Fields: `(requested_page, total_pages)`. Page numbers are 1-based.
28    #[error("page {0} out of range (document has {1} pages)")]
29    PageOutOfRange(u32, u32),
30
31    /// The configured OCR engine returned a non-recoverable error
32    /// (Tesseract initialisation failure, missing language data, model
33    /// load failure, etc.). The wrapped string is the engine's verbatim
34    /// message — surface it to the user.
35    #[error("OCR engine error: {0}")]
36    Engine(String),
37
38    /// Rasterising the page for OCR failed before the engine could see
39    /// any pixels (font resolution failure, unsupported colorspace,
40    /// page geometry error, etc.).
41    #[error("render error: {0}")]
42    Render(String),
43
44    /// A non-categorised OCR failure. Reserved for cases the more specific
45    /// variants do not cover; the message describes the situation.
46    #[error("{0}")]
47    Other(String),
48}
49
50/// Convenience `Result` alias for fallible `pdf-ocr` operations.
51pub type Result<T> = std::result::Result<T, OcrError>;