#![forbid(unsafe_code)]
#![deny(missing_docs)]
#![cfg_attr(docsrs, feature(doc_cfg))]
pub mod bundle;
pub mod extract;
pub mod layout;
#[cfg(feature = "mcp")]
#[cfg_attr(docsrs, doc(cfg(feature = "mcp")))]
pub mod mcp;
pub mod ocr;
mod postprocess;
mod preprocess;
pub mod render;
#[cfg(feature = "ocr-tesseract")]
#[cfg_attr(docsrs, doc(cfg(feature = "ocr-tesseract")))]
pub use bundle::{clean, clean_with_ocr_backend};
pub use bundle::{
BundleReport, ClassCount, LayoutSummary, OcrSource, PageReport, Pipeline, SafeBundle,
BUNDLE_VERSION,
};
pub use layout::ReadingOrder;
#[cfg(feature = "ocr-tesseract")]
#[cfg_attr(docsrs, doc(cfg(feature = "ocr-tesseract")))]
pub use ocr::TesseractBackend;
pub use ocr::{
detect_image_format, BBox, ImageFormat, ImageInput, LanguageTag, OcrBackend, OcrError,
OcrHints, OcrSpan,
};
pub use render::Renderer;
#[non_exhaustive]
#[derive(Debug)]
pub enum DocumentError {
TesseractNotFound(String),
TesseractFailed {
status: i32,
stderr: String,
},
PdfiumNotFound(String),
PdfRasterFailed(String),
UnsupportedInput {
path: std::path::PathBuf,
reason: &'static str,
},
Io(std::io::Error),
OutputDir(std::path::PathBuf, std::io::Error),
Pipeline(String),
Serde(serde_json::Error),
NotImplemented(&'static str),
}
impl core::fmt::Display for DocumentError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Self::TesseractNotFound(hint) => write!(
f,
"gaze-document: tesseract binary not found on PATH. {hint}"
),
Self::TesseractFailed { status, stderr } => write!(
f,
"gaze-document: tesseract exited with status {status}: {stderr}"
),
Self::PdfiumNotFound(hint) => {
write!(f, "gaze-document: pdfium dynamic library not found. {hint}")
}
Self::PdfRasterFailed(detail) => {
write!(f, "gaze-document: pdf rasterization failed: {detail}")
}
Self::UnsupportedInput { path, reason } => write!(
f,
"gaze-document: unsupported input `{}`: {reason}",
path.display()
),
Self::Io(err) => write!(f, "gaze-document: io error: {err}"),
Self::OutputDir(path, err) => write!(
f,
"gaze-document: cannot prepare output dir `{}`: {err}",
path.display()
),
Self::Pipeline(detail) => write!(f, "gaze-document: pipeline error: {detail}"),
Self::Serde(err) => write!(f, "gaze-document: serialize error: {err}"),
Self::NotImplemented(what) => {
write!(f, "gaze-document: {what} is not yet implemented")
}
}
}
}
impl std::error::Error for DocumentError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::Io(err) | Self::OutputDir(_, err) => Some(err),
Self::Serde(err) => Some(err),
_ => None,
}
}
}
impl From<std::io::Error> for DocumentError {
fn from(err: std::io::Error) -> Self {
Self::Io(err)
}
}
impl From<serde_json::Error> for DocumentError {
fn from(err: serde_json::Error) -> Self {
Self::Serde(err)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn crate_compiles_and_error_renders() {
let err = DocumentError::NotImplemented("smoke");
assert!(err.to_string().contains("not yet implemented"));
}
#[test]
fn tesseract_not_found_error_includes_hint() {
let err = DocumentError::TesseractNotFound("Install via `brew install tesseract`.".into());
let msg = err.to_string();
assert!(msg.contains("tesseract"));
assert!(msg.contains("brew install"));
}
}