use crate::DocumentError;
mod normalize;
pub(crate) use normalize::normalize_ocr_artifacts;
#[cfg(feature = "ocr-tesseract")]
#[cfg_attr(docsrs, doc(cfg(feature = "ocr-tesseract")))]
pub mod tesseract;
#[cfg(feature = "ocr-tesseract")]
#[cfg_attr(docsrs, doc(cfg(feature = "ocr-tesseract")))]
pub use tesseract::TesseractOcr;
#[non_exhaustive]
#[derive(Debug, Clone)]
pub struct OcrResult {
pub text: String,
pub mean_confidence: Option<f32>,
pub word_count: usize,
pub lang: String,
}
impl OcrResult {
pub fn new(
text: String,
mean_confidence: Option<f32>,
word_count: usize,
lang: String,
) -> Self {
Self {
text,
mean_confidence,
word_count,
lang,
}
}
}
pub trait OcrAdapter {
fn extract_text(&self, bytes: &[u8]) -> Result<String, DocumentError>;
}
#[non_exhaustive]
pub struct PendingOcrAdapter {
_private: (),
}
impl PendingOcrAdapter {
pub fn new() -> Result<Self, DocumentError> {
Err(DocumentError::NotImplemented(
"PendingOcrAdapter::new (wire a concrete OCR backend)",
))
}
}
impl OcrAdapter for PendingOcrAdapter {
fn extract_text(&self, _bytes: &[u8]) -> Result<String, DocumentError> {
Err(DocumentError::NotImplemented(
"PendingOcrAdapter::extract_text (wire a concrete OCR backend)",
))
}
}