extern crate tesseract_sys;
extern crate thiserror;
use self::tesseract_sys::{
TessBaseAPICreate, TessBaseAPIDelete, TessBaseAPIGetAltoText, TessBaseAPIGetHOCRText,
TessBaseAPIGetInputImage, TessBaseAPIGetLSTMBoxText, TessBaseAPIGetSourceYResolution,
TessBaseAPIGetTsvText, TessBaseAPIGetUTF8Text, TessBaseAPIGetWordStrBoxText, TessBaseAPIInit3,
TessBaseAPIMeanTextConf, TessBaseAPIRecognize, TessBaseAPISetImage, TessBaseAPISetImage2,
TessBaseAPISetRectangle, TessBaseAPISetSourceResolution, TessBaseAPISetVariable,
};
use self::thiserror::Error;
use crate::Text;
use leptonica_plumbing::Pix;
use std::convert::TryInto;
use std::ffi::CStr;
use std::os::raw::c_int;
use std::ptr;
#[derive(Debug)]
pub struct TessBaseAPI(*mut tesseract_sys::TessBaseAPI);
impl Drop for TessBaseAPI {
fn drop(&mut self) {
unsafe { TessBaseAPIDelete(self.0) }
}
}
impl Default for TessBaseAPI {
fn default() -> Self {
Self::create()
}
}
#[derive(Debug, Error)]
#[error("TessBaseApi failed to initialize")]
pub struct TessBaseAPIInitError();
#[derive(Debug, Error)]
#[error("TessBaseApi failed to set variable")]
pub struct TessBaseAPISetVariableError();
#[derive(Debug, Error)]
#[error("TessBaseApi failed to recognize")]
pub struct TessBaseAPIRecogniseError();
#[derive(Debug, Error)]
#[error("TessBaseApi get_hocr_text returned null")]
pub struct TessBaseAPIGetHOCRTextError();
#[derive(Debug, Error)]
#[error("TessBaseApi get_utf8_text returned null")]
pub struct TessBaseAPIGetUTF8TextError();
#[derive(Debug, Error, PartialEq)]
pub enum TessBaseAPISetImageSafetyError {
#[error("Image dimensions exceed computer memory")]
DimensionsExceedMemory(),
#[error("Image dimensions exceed image size")]
DimensionsExceedImageSize(),
#[error("Image width exceeds bytes per line")]
ImageWidthExceedsBytesPerLine(),
}
#[derive(Debug, Error)]
#[error("TessBaseApi get_alto_text returned null")]
pub struct TessBaseAPIGetAltoTextError();
#[derive(Debug, Error)]
#[error("TessBaseApi get_tsv_text returned null")]
pub struct TessBaseAPIGetTsvTextError();
#[derive(Debug, Error)]
#[error("TessBaseApi get_lstm_box_text returned null")]
pub struct TessBaseAPIGetLSTMBoxTextError();
#[derive(Debug, Error)]
#[error("TessBaseApi get_word_str_text returned null")]
pub struct TessBaseAPIGetWordStrBoxTextError();
impl TessBaseAPI {
pub fn create() -> TessBaseAPI {
TessBaseAPI(unsafe { TessBaseAPICreate() })
}
pub fn init_2(
&mut self,
datapath: Option<&CStr>,
language: Option<&CStr>,
) -> Result<(), TessBaseAPIInitError> {
let ret = unsafe {
TessBaseAPIInit3(
self.0,
datapath.map(CStr::as_ptr).unwrap_or_else(ptr::null),
language.map(CStr::as_ptr).unwrap_or_else(ptr::null),
)
};
if ret == 0 {
Ok(())
} else {
Err(TessBaseAPIInitError {})
}
}
pub fn set_image_2(&mut self, pix: &Pix) {
unsafe {
TessBaseAPISetImage2(self.0, *pix.as_ref());
}
}
pub fn set_image(
&mut self,
image_data: &[u8],
width: c_int,
height: c_int,
bytes_per_pixel: c_int,
bytes_per_line: c_int,
) -> Result<(), TessBaseAPISetImageSafetyError> {
let claimed_image_size: usize = (height * bytes_per_line)
.try_into()
.map_err(|_| TessBaseAPISetImageSafetyError::DimensionsExceedMemory())?;
if claimed_image_size > image_data.len() {
return Err(TessBaseAPISetImageSafetyError::DimensionsExceedImageSize());
}
match bytes_per_pixel {
0 => {
if width > bytes_per_line * 8 {
return Err(TessBaseAPISetImageSafetyError::ImageWidthExceedsBytesPerLine());
}
}
_ => {
if width * bytes_per_pixel > bytes_per_line {
return Err(TessBaseAPISetImageSafetyError::ImageWidthExceedsBytesPerLine());
}
}
}
unsafe {
TessBaseAPISetImage(
self.0,
image_data.as_ptr(),
width,
height,
bytes_per_pixel,
bytes_per_line,
);
};
Ok(())
}
pub fn set_source_resolution(&mut self, ppi: c_int) {
unsafe {
TessBaseAPISetSourceResolution(self.0, ppi);
}
}
pub fn set_variable(
&mut self,
name: &CStr,
value: &CStr,
) -> Result<(), TessBaseAPISetVariableError> {
let ret = unsafe { TessBaseAPISetVariable(self.0, name.as_ptr(), value.as_ptr()) };
match ret {
1 => Ok(()),
_ => Err(TessBaseAPISetVariableError {}),
}
}
pub fn recognize(&mut self) -> Result<(), TessBaseAPIRecogniseError> {
let ret = unsafe { TessBaseAPIRecognize(self.0, ptr::null_mut()) };
match ret {
0 => Ok(()),
_ => Err(TessBaseAPIRecogniseError {}),
}
}
pub fn get_utf8_text(&mut self) -> Result<Text, TessBaseAPIGetUTF8TextError> {
let ptr = unsafe { TessBaseAPIGetUTF8Text(self.0) };
if ptr.is_null() {
Err(TessBaseAPIGetUTF8TextError {})
} else {
Ok(unsafe { Text::new(ptr) })
}
}
pub fn get_hocr_text(&mut self, page: c_int) -> Result<Text, TessBaseAPIGetHOCRTextError> {
let ptr = unsafe { TessBaseAPIGetHOCRText(self.0, page) };
if ptr.is_null() {
Err(TessBaseAPIGetHOCRTextError {})
} else {
Ok(unsafe { Text::new(ptr) })
}
}
pub fn get_input_image(&self) -> Option<leptonica_plumbing::BorrowedPix> {
let ptr = unsafe { TessBaseAPIGetInputImage(self.0) };
if ptr.is_null() {
None
} else {
Some(unsafe { leptonica_plumbing::BorrowedPix::new(ptr) })
}
}
pub fn get_source_y_resolution(&self) -> c_int {
unsafe { TessBaseAPIGetSourceYResolution(self.0) }
}
pub fn set_rectangle(&mut self, left: c_int, top: c_int, width: c_int, height: c_int) {
unsafe { TessBaseAPISetRectangle(self.0, left, top, width, height) }
}
pub fn get_alto_text(
&mut self,
page_number: c_int,
) -> Result<Text, TessBaseAPIGetAltoTextError> {
let ptr = unsafe { TessBaseAPIGetAltoText(self.0, page_number) };
if ptr.is_null() {
Err(TessBaseAPIGetAltoTextError {})
} else {
Ok(unsafe { Text::new(ptr) })
}
}
pub fn get_tsv_text(&mut self, page_number: c_int) -> Result<Text, TessBaseAPIGetTsvTextError> {
let ptr = unsafe { TessBaseAPIGetTsvText(self.0, page_number) };
if ptr.is_null() {
Err(TessBaseAPIGetTsvTextError {})
} else {
Ok(unsafe { Text::new(ptr) })
}
}
pub fn get_lstm_box_text(
&mut self,
page_number: c_int,
) -> Result<Text, TessBaseAPIGetLSTMBoxTextError> {
let ptr = unsafe { TessBaseAPIGetLSTMBoxText(self.0, page_number) };
if ptr.is_null() {
Err(TessBaseAPIGetLSTMBoxTextError {})
} else {
Ok(unsafe { Text::new(ptr) })
}
}
pub fn get_word_str_box_text(
&mut self,
page_number: c_int,
) -> Result<Text, TessBaseAPIGetWordStrBoxTextError> {
let ptr = unsafe { TessBaseAPIGetWordStrBoxText(self.0, page_number) };
if ptr.is_null() {
Err(TessBaseAPIGetWordStrBoxTextError {})
} else {
Ok(unsafe { Text::new(ptr) })
}
}
pub fn mean_text_conf(&self) -> c_int {
unsafe { TessBaseAPIMeanTextConf(self.0) }
}
}
#[test]
fn set_image_1_safety_test() {
use image::GenericImageView;
let mut tess = TessBaseAPI::create();
tess.init_2(None, None).unwrap();
let img = image::open("image.png").unwrap();
assert_eq!(
tess.set_image(
img.as_rgba8().unwrap(),
img.width().try_into().unwrap(),
img.height().try_into().unwrap(),
4,
(img.width() * 4).try_into().unwrap()
),
Ok(())
);
assert_eq!(tess.set_image(&[0, 0, 0, 0], 2, 2, 1, 2), Ok(()));
assert_eq!(
tess.set_image(&[0, 0, 0], 2, 2, 1, 2),
Err(TessBaseAPISetImageSafetyError::DimensionsExceedImageSize())
);
assert_eq!(
tess.set_image(&[0, 0, 0, 0], 2, 2, 1, 1),
Err(TessBaseAPISetImageSafetyError::ImageWidthExceedsBytesPerLine())
);
assert_eq!(tess.set_image(&[0, 0, 0, 0], 16, 2, 0, 2), Ok(()));
assert_eq!(
tess.set_image(&[0, 0, 0, 0], 17, 2, 0, 2),
Err(TessBaseAPISetImageSafetyError::ImageWidthExceedsBytesPerLine())
);
}
#[test]
fn set_variable_error_test() -> Result<(), Box<dyn std::error::Error>> {
let fail = std::ffi::CString::new("fail")?;
let mut tess = TessBaseAPI::create();
tess.init_2(None, None)?;
assert!(tess.set_variable(&fail, &fail).is_err());
Ok(())
}