pub trait OCRTesseract: BaseOCR + OCRTesseractConst {
    fn as_raw_mut_OCRTesseract(&mut self) -> *mut c_void;

    fn run_multiple(
        &mut self,
        image: &mut Mat,
        output_text: &mut String,
        component_rects: &mut Vector<Rect>,
        component_texts: &mut Vector<String>,
        component_confidences: &mut Vector<f32>,
        component_level: i32
    ) -> Result<()> { ... } fn run_multiple_mask(
        &mut self,
        image: &mut Mat,
        mask: &mut Mat,
        output_text: &mut String,
        component_rects: &mut Vector<Rect>,
        component_texts: &mut Vector<String>,
        component_confidences: &mut Vector<f32>,
        component_level: i32
    ) -> Result<()> { ... } fn run(
        &mut self,
        image: &dyn ToInputArray,
        min_confidence: i32,
        component_level: i32
    ) -> Result<String> { ... } fn run_mask(
        &mut self,
        image: &dyn ToInputArray,
        mask: &dyn ToInputArray,
        min_confidence: i32,
        component_level: i32
    ) -> Result<String> { ... } fn set_white_list(&mut self, char_whitelist: &str) -> Result<()> { ... } }

Required Methods

Provided Methods

Recognize text using the tesseract-ocr API.

Takes image on input and returns recognized text in the output_text parameter. Optionally provides also the Rects for individual text elements found (e.g. words), and the list of those text elements with their confidence values.

Parameters
  • image: Input image CV_8UC1 or CV_8UC3
  • output_text: Output text of the tesseract-ocr.
  • component_rects: If provided the method will output a list of Rects for the individual text elements found (e.g. words or text lines).
  • component_texts: If provided the method will output a list of text strings for the recognition of individual text elements found (e.g. words or text lines).
  • component_confidences: If provided the method will output a list of confidence values for the recognition of individual text elements found (e.g. words or text lines).
  • component_level: OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXTLINE.
C++ default parameters
  • component_rects: NULL
  • component_texts: NULL
  • component_confidences: NULL
  • component_level: 0
C++ default parameters
  • component_rects: NULL
  • component_texts: NULL
  • component_confidences: NULL
  • component_level: 0
C++ default parameters
  • component_level: 0
C++ default parameters
  • component_level: 0

Implementations

Creates an instance of the OCRTesseract class. Initializes Tesseract.

Parameters
  • datapath: the name of the parent directory of tessdata ended with “/”, or NULL to use the system’s default directory.
  • language: an ISO 639-3 code or NULL will default to “eng”.
  • char_whitelist: specifies the list of characters used for recognition. NULL defaults to “0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ”.
  • oem: tesseract-ocr offers different OCR Engine Modes (OEM), by default tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible values.
  • psmode: tesseract-ocr offers different Page Segmentation Modes (PSM) tesseract::PSM_AUTO (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other possible values.
C++ default parameters
  • datapath: NULL
  • language: NULL
  • char_whitelist: NULL
  • oem: OEM_DEFAULT
  • psmode: PSM_AUTO

Implementors