Skip to main content

tesseract_ocr_static/
ocr.rs

1use core::ffi::CStr;
2use core::marker::PhantomData;
3use core::ops::Deref;
4use core::ops::DerefMut;
5use core::ptr::NonNull;
6use std::os::raw::c_void;
7use std::time::Duration;
8
9use crate::Element;
10use crate::FontAttrs;
11use crate::Image;
12use crate::InitFailed;
13use crate::LayoutIter;
14use crate::LayoutLevel;
15use crate::OcrEngineMode;
16use crate::RecognitionFailed;
17use crate::Rectangle;
18use crate::Tesseract;
19use crate::Text;
20use crate::Utf8Text;
21
22const ENGLISH: &CStr = c"eng";
23
24/// OCR configuration.
25#[derive(Debug)]
26pub struct Config<'a, 'b> {
27    /// Data directory where language-specific training data is stored.
28    ///
29    /// If not specified, the `TESSDATA_PREFIX` environment variable is used instead.
30    /// If the variable isn't defined, the build-time default is used.
31    pub data_dir: Option<&'a CStr>,
32    /// Languages are specified by their three-letter ISO codes separated by '+' symbol.
33    ///
34    /// English is the default.
35    pub languages: &'b CStr,
36    /// OCR engine mode. LSTM is the default.
37    pub ocr_engine_mode: OcrEngineMode,
38}
39
40impl Default for Config<'static, 'static> {
41    fn default() -> Self {
42        Self {
43            data_dir: None,
44            languages: ENGLISH,
45            ocr_engine_mode: OcrEngineMode::LstmOnly,
46        }
47    }
48}
49
50/// OCR engine interface.
51///
52/// ```rust,no_run
53/// use tesseract_ocr_static::{Image, TextRecognizer};
54/// use image::ImageReader;
55///
56/// let rgb = ImageReader::open("hello.txt").unwrap().decode().unwrap().into_rgb8();
57/// let image = Image::from_rgb(rgb.width(), rgb.height(), rgb.as_raw()).unwrap();
58/// let mut recognizer = TextRecognizer::new().unwrap();
59/// let results = recognizer.recognize_text(&image).unwrap();
60/// assert_eq!("Hello world", results.get_utf8_text().as_str());
61/// ```
62pub struct TextRecognizer {
63    base: Tesseract,
64}
65
66impl TextRecognizer {
67    /// Creates new text recognizer with the default data directory, default language (English),
68    /// and default OCR engine mode (LSTM).
69    pub fn new() -> Result<Self, InitFailed> {
70        Self::with_languages(ENGLISH)
71    }
72
73    /// Creates new text recognizer with the specified languages.
74    ///
75    /// Languages are specified by their three-letter ISO codes separated by '+' symbol.
76    pub fn with_languages(languages: &CStr) -> Result<Self, InitFailed> {
77        Self::with_config(Config {
78            languages,
79            ..Default::default()
80        })
81    }
82
83    /// Creates new text recognizer with the provided configuration.
84    pub fn with_config(config: Config<'_, '_>) -> Result<Self, InitFailed> {
85        let ptr = unsafe { c::TessBaseAPICreate() };
86        let ptr = NonNull::new(ptr).expect("TessBaseAPICreate returned NULL");
87        let ret = unsafe {
88            c::TessBaseAPIInit2(
89                ptr.as_ptr(),
90                config
91                    .data_dir
92                    .map(|x| x.as_ptr())
93                    .unwrap_or(core::ptr::null_mut()),
94                config.languages.as_ptr(),
95                config.ocr_engine_mode as u32,
96            )
97        };
98        if ret < 0 {
99            return Err(InitFailed);
100        }
101        let base = Tesseract { ptr };
102        Ok(Self { base })
103    }
104
105    /// Recognizes text in the provided image and returns an iterator over the results.
106    pub fn recognize_text<'a>(
107        &'a mut self,
108        image: &Image,
109    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
110        let monitor: Option<Monitor<()>> = None;
111        self.do_recognize_text(image, monitor)
112    }
113
114    /// Recognizes text in the provided image and returns an iterator over the results.
115    ///
116    /// Timeout is the max. time spent for text recognition.
117    pub fn recognize_text_with_timeout<'a>(
118        &'a mut self,
119        image: &Image,
120        timeout: Duration,
121    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
122        let mut monitor = Monitor::<()>::new();
123        monitor.set_timeout(timeout);
124        self.do_recognize_text(image, Some(monitor))
125    }
126
127    /// Recognizes text in the provided image and returns an iterator over the results.
128    ///
129    /// The monitor is used to track the progress and set the timeout.
130    pub fn recognize_text_with_monitor<'a, C>(
131        &'a mut self,
132        image: &Image,
133        monitor: Monitor<C>,
134    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
135        self.do_recognize_text(image, Some(monitor))
136    }
137
138    fn do_recognize_text<'a, C>(
139        &'a mut self,
140        image: &Image,
141        monitor: Option<Monitor<C>>,
142    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
143        unsafe { c::TessBaseAPISetImage2(self.as_ptr(), image.ptr.as_ptr()) };
144        let monitor_ptr = monitor
145            .map(|m| m.ptr.as_ptr())
146            .unwrap_or(core::ptr::null_mut());
147        let ret = unsafe { c::TessBaseAPIRecognize(self.as_ptr(), monitor_ptr) };
148        if ret < 0 {
149            return Err(RecognitionFailed);
150        }
151        Ok(RecognitionResults { inner: self })
152    }
153
154    /// Recognizes text in the specified rectangle of the provided image and
155    /// returns an iterator over the results.
156    pub fn recognize_text_in_rect<'a>(
157        &'a mut self,
158        image: &Image,
159        rect: &Rectangle,
160    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
161        let monitor: Option<Monitor<()>> = None;
162        self.do_recognize_text_in_rect(image, rect, monitor)
163    }
164
165    /// Recognizes text in the specified rectangle of the provided image and
166    /// returns an iterator over the results.
167    ///
168    /// Timeout is the max. time spent for text recognition.
169    pub fn recognize_text_in_rect_with_timeout<'a>(
170        &'a mut self,
171        image: &Image,
172        rect: &Rectangle,
173        timeout: Duration,
174    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
175        let mut monitor = Monitor::<()>::new();
176        monitor.set_timeout(timeout);
177        self.do_recognize_text_in_rect(image, rect, Some(monitor))
178    }
179
180    /// Recognizes text in the specified rectangle of the provided image and
181    /// returns an iterator over the results.
182    ///
183    /// The monitor is used to track the progress and set the timeout.
184    pub fn recognize_text_in_rect_with_monitor<'a, C>(
185        &'a mut self,
186        image: &Image,
187        rect: &Rectangle,
188        monitor: Monitor<C>,
189    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
190        self.do_recognize_text_in_rect(image, rect, Some(monitor))
191    }
192
193    fn do_recognize_text_in_rect<'a, C>(
194        &'a mut self,
195        image: &Image,
196        rect: &Rectangle,
197        monitor: Option<Monitor<C>>,
198    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
199        unsafe { c::TessBaseAPISetImage2(self.as_ptr(), image.ptr.as_ptr()) };
200        unsafe {
201            c::TessBaseAPISetRectangle(
202                self.as_ptr(),
203                rect.left as i32,
204                rect.top as i32,
205                rect.width as i32,
206                rect.height as i32,
207            )
208        };
209        let monitor_ptr = monitor
210            .map(|m| m.ptr.as_ptr())
211            .unwrap_or(core::ptr::null_mut());
212        let ret = unsafe { c::TessBaseAPIRecognize(self.as_ptr(), monitor_ptr) };
213        if ret < 0 {
214            return Err(RecognitionFailed);
215        }
216        Ok(RecognitionResults { inner: self })
217    }
218
219    /// Analyzes the text layout in the provided image and returns layout analysis results as an
220    /// iterator.
221    ///
222    /// If you only need layout, consider using [`LayoutAnalyzer`](crate::LayoutAnalyzer) that uses
223    /// less memory.
224    pub fn analyze_layout<'a>(&'a self, image: &Image) -> LayoutIter<'a> {
225        unsafe { c::TessBaseAPISetImage2(self.as_ptr(), image.ptr.as_ptr()) };
226        let ptr = unsafe { c::TessBaseAPIAnalyseLayout(self.as_ptr()) };
227        let ptr = NonNull::new(ptr).expect("TessBaseAPIAnalyseLayout returned NULL");
228        unsafe { c::TessPageIteratorBegin(ptr.as_ptr()) };
229        LayoutIter {
230            ptr,
231            phantom: PhantomData,
232        }
233    }
234
235    /// Returns the number of Directed Acyclic Word Graph (DAWG) in the dictionary.
236    pub fn num_dawgs(&self) -> u32 {
237        let ret = unsafe { c::TessBaseAPIGetPageSegMode(self.ptr.as_ptr()) };
238        ret as u32
239    }
240
241    #[inline]
242    fn as_ptr(&self) -> *mut c::TessBaseAPI {
243        self.base.ptr.as_ptr()
244    }
245}
246
247impl Deref for TextRecognizer {
248    type Target = Tesseract;
249
250    fn deref(&self) -> &Self::Target {
251        &self.base
252    }
253}
254
255impl DerefMut for TextRecognizer {
256    fn deref_mut(&mut self) -> &mut Self::Target {
257        &mut self.base
258    }
259}
260
261/// Text recognition results.
262pub struct RecognitionResults<'a> {
263    inner: &'a TextRecognizer,
264}
265
266impl<'a> RecognitionResults<'a> {
267    /// Returns recognized text as string.
268    pub fn get_utf8_text(&self) -> Utf8Text {
269        let ptr = unsafe { c::TessBaseAPIGetUTF8Text(self.as_ptr()) };
270        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetUTF8Text returned NULL");
271        Utf8Text(Text { ptr })
272    }
273
274    /// Returns recognized text as HTML-formatted string with
275    /// [hOCR markup](https://en.wikipedia.org/wiki/HOCR).
276    ///
277    /// `page` is zero-based page index that appears in the output as one-based.
278    pub fn get_hocr_text(&self, page: u32) -> Text {
279        let ptr = unsafe { c::TessBaseAPIGetHOCRText(self.as_ptr(), page as i32) };
280        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetHOCRText returned NULL");
281        Text { ptr }
282    }
283
284    /// Returns recognized text as XML-formatted string with
285    /// [ALTO markup](https://en.wikipedia.org/wiki/Analyzed_Layout_and_Text_Object).
286    ///
287    /// `page` is zero-based page index that appears in the output.
288    pub fn get_alto_text(&self, page: u32) -> Text {
289        let ptr = unsafe { c::TessBaseAPIGetAltoText(self.as_ptr(), page as i32) };
290        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetAltoText returned NULL");
291        Text { ptr }
292    }
293
294    /// Returns recognized text as XML-formatted string with PAGE markup.
295    ///
296    /// `page` is zero-based page index that appears in the output as one-based.
297    ///
298    /// WARNING: This function is currently broken (throws an exception).
299    #[doc(hidden)]
300    pub fn get_page_text(&self, page: u32) -> Text {
301        let ptr = unsafe { c::TessBaseAPIGetPAGEText(self.as_ptr(), page as i32) };
302        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetPAGEText returned NULL");
303        Text { ptr }
304    }
305
306    /// Returns recognized text as TSV-formatted string.
307    ///
308    /// `page` is zero-based page index that appears in the output as one-based.
309    ///
310    /// # TSV columns
311    ///
312    /// | Column | Comment |
313    /// |--------|---------|
314    /// | Page number | |
315    /// | Block index | |
316    /// | Paragraph index | |
317    /// | Line index | |
318    /// | Word index | |
319    /// | Bounding box left | |
320    /// | Bounding box top | |
321    /// | Bounding box width | |
322    /// | Bounding box height | |
323    /// | Confidence | `-1` means end of the element |
324    /// | Word | |
325    pub fn get_tsv_text(&self, page: u32) -> Text {
326        let ptr = unsafe { c::TessBaseAPIGetTsvText(self.as_ptr(), page as i32) };
327        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetTsvText returned NULL");
328        Text { ptr }
329    }
330
331    /// Returns the
332    /// [box file](https://tesseract-ocr.github.io/tessdoc/tess4/Make-Box-Files.html)
333    /// for the page.
334    ///
335    /// `page` is zero-based page index that appears in the output.
336    pub fn get_box_text(&self, page: u32) -> Text {
337        let ptr = unsafe { c::TessBaseAPIGetBoxText(self.as_ptr(), page as i32) };
338        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetBoxText returned NULL");
339        Text { ptr }
340    }
341
342    /// Returns the
343    /// [LSTM box file](https://tesseract-ocr.github.io/tessdoc/tess4/Make-Box-Files.html)
344    /// for the page.
345    ///
346    /// `page` is zero-based page index that appears in the output.
347    pub fn get_lstm_box_text(&self, page: u32) -> Text {
348        let ptr = unsafe { c::TessBaseAPIGetLSTMBoxText(self.as_ptr(), page as i32) };
349        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetLSTMBoxText returned NULL");
350        Text { ptr }
351    }
352
353    /// Returns the
354    /// [WordStr box file](https://tesseract-ocr.github.io/tessdoc/tess4/Make-Box-Files.html)
355    /// for the page.
356    ///
357    /// `page` is zero-based page index that appears in the output.
358    pub fn get_word_str_box_text(&self, page: u32) -> Text {
359        let ptr = unsafe { c::TessBaseAPIGetWordStrBoxText(self.as_ptr(), page as i32) };
360        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetWordStrBoxText returned NULL");
361        Text { ptr }
362    }
363
364    /// Returns recognized text as UNLV-formatted string.
365    pub fn get_unlv_text(&self) -> Text {
366        let ptr = unsafe { c::TessBaseAPIGetUNLVText(self.as_ptr()) };
367        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetUNLVText returned NULL");
368        Text { ptr }
369    }
370
371    /// Returns an iterator over text elements.
372    pub fn iter(&self) -> ResultIter<'a> {
373        let ptr = unsafe { c::TessBaseAPIGetIterator(self.as_ptr()) };
374        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetIterator returned NULL");
375        ResultIter {
376            ptr,
377            phantom: PhantomData,
378        }
379    }
380
381    /// Returns a copy of the thresholded image.
382    pub fn get_thresholded_image(&self) -> Image {
383        let ptr = unsafe { c::TessBaseAPIGetThresholdedImage(self.as_ptr()) };
384        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetThresholdedImage returned NULL");
385        Image { ptr }
386    }
387
388    /// Returns thresholded image scale.
389    pub fn get_thresholded_image_scale_factor(&self) -> u32 {
390        let ret = unsafe { c::TessBaseAPIGetThresholdedImageScaleFactor(self.as_ptr()) };
391        ret as u32
392    }
393
394    /// Returns average gradient of lines on page.
395    pub fn get_gradient(&self) -> f32 {
396        unsafe { c::TessBaseAPIGetGradient(self.as_ptr()) }
397    }
398
399    /// Returns `true` if the word is valid according to Tesseract's language model.
400    #[doc(hidden)]
401    pub fn is_valid_word(&self, word: &CStr) -> bool {
402        let ret = unsafe { c::TessBaseAPIIsValidWord(self.as_ptr(), word.as_ptr()) };
403        ret != 0
404    }
405
406    /// Returns text direction in Tesseract's coordinates.
407    #[doc(hidden)]
408    pub fn get_text_direction(&self) -> Option<(u32, f32)> {
409        let mut offset: i32 = 0;
410        let mut slope: f32 = 0.0;
411        let ret = unsafe { c::TessBaseAPIGetTextDirection(self.as_ptr(), &mut offset, &mut slope) };
412        if ret == 0 {
413            return None;
414        }
415        Some((offset as u32, slope))
416    }
417
418    #[inline]
419    fn as_ptr(&self) -> *mut c::TessBaseAPI {
420        self.inner.as_ptr()
421    }
422}
423
424/// An iterator over recognition results.
425pub struct ResultIter<'a> {
426    ptr: NonNull<c::TessResultIterator>,
427    #[allow(unused)]
428    phantom: PhantomData<&'a Tesseract>,
429}
430
431impl<'a> ResultIter<'a> {
432    /// Returns the next text element at the specified level or
433    /// `None` if such an element doesn't exist.
434    #[must_use]
435    pub fn next(&mut self, level: LayoutLevel) -> Option<TextElement<'_>> {
436        let ret = unsafe { c::TessResultIteratorNext(self.ptr.as_ptr(), level as u32) };
437        (ret != 0).then_some(TextElement { iter: self })
438    }
439
440    /// Returnrs an iterator over layout elements.
441    pub fn as_layout_iter(&self) -> LayoutIter<'a> {
442        let ptr = unsafe { c::TessResultIteratorGetPageIterator(self.ptr.as_ptr()) };
443        let ptr = NonNull::new(ptr).expect("TessResultIteratorGetPageIterator returned NULL");
444        LayoutIter {
445            ptr,
446            phantom: PhantomData,
447        }
448    }
449}
450
451impl Drop for ResultIter<'_> {
452    fn drop(&mut self) {
453        unsafe { c::TessResultIteratorDelete(self.ptr.as_ptr()) };
454    }
455}
456
457impl Clone for ResultIter<'_> {
458    fn clone(&self) -> Self {
459        let ptr = unsafe { c::TessResultIteratorCopy(self.ptr.as_ptr()) };
460        let ptr = NonNull::new(ptr).expect("TessResultIteratorCopy returned NULL");
461        Self {
462            ptr,
463            phantom: PhantomData,
464        }
465    }
466}
467
468/// Text element.
469///
470/// A layout element with the recognized text.
471pub struct TextElement<'a> {
472    iter: &'a ResultIter<'a>,
473}
474
475impl<'a> TextElement<'a> {
476    /// Get recognized text as UTF-8 string.
477    pub fn get_utf8_text(&self, level: LayoutLevel) -> Utf8Text {
478        let ptr = unsafe { c::TessResultIteratorGetUTF8Text(self.iter.ptr.as_ptr(), level as u32) };
479        let ptr = NonNull::new(ptr).expect("TessResultIteratorGetUTF8Text returned NULL");
480        Utf8Text(Text { ptr })
481    }
482
483    /// Returns the mean confidence of the element at the given level.
484    ///
485    /// The confidence range is _[0; 100]_.
486    pub fn confidence(&self, level: LayoutLevel) -> f32 {
487        unsafe { c::TessResultIteratorConfidence(self.iter.ptr.as_ptr(), level as u32) }
488    }
489
490    /// Returns the language that was used to recognize the word.
491    pub fn word_recognition_language(&self) -> Option<&CStr> {
492        let ptr = unsafe { c::TessResultIteratorWordRecognitionLanguage(self.iter.ptr.as_ptr()) };
493        if ptr.is_null() {
494            return None;
495        }
496        Some(unsafe { CStr::from_ptr(ptr) })
497    }
498
499    /// Returns `true` if the current word is a dictionary word.
500    pub fn word_is_from_dictionary(&self) -> bool {
501        let ret = unsafe { c::TessResultIteratorWordIsFromDictionary(self.iter.ptr.as_ptr()) };
502        ret != 0
503    }
504
505    /// Returns true if the current word is a number.
506    pub fn word_is_numeric(&self) -> bool {
507        let ret = unsafe { c::TessResultIteratorWordIsNumeric(self.iter.ptr.as_ptr()) };
508        ret != 0
509    }
510
511    /// Returns font attributes of the current word as well as the font name.
512    pub fn word_font_attributes(&self) -> Option<(FontAttrs, &CStr)> {
513        let mut is_bold = 0;
514        let mut is_italic = 0;
515        let mut is_underlined = 0;
516        let mut is_monospace = 0;
517        let mut is_serif = 0;
518        let mut is_smallcaps = 0;
519        let mut point_size = 0;
520        let mut font_id = 0;
521        let ptr = unsafe {
522            c::TessResultIteratorWordFontAttributes(
523                self.iter.ptr.as_ptr(),
524                &mut is_bold,
525                &mut is_italic,
526                &mut is_underlined,
527                &mut is_monospace,
528                &mut is_serif,
529                &mut is_smallcaps,
530                &mut point_size,
531                &mut font_id,
532            )
533        };
534        if ptr.is_null() {
535            return None;
536        }
537        let font = unsafe { CStr::from_ptr(ptr) };
538        let attrs = FontAttrs {
539            is_bold: is_bold != 0,
540            is_italic: is_italic != 0,
541            is_underlined: is_underlined != 0,
542            is_monospace: is_monospace != 0,
543            is_serif: is_serif != 0,
544            is_smallcaps: is_smallcaps != 0,
545            point_size: point_size as u32,
546            font_id,
547        };
548        Some((attrs, font))
549    }
550
551    /// Returns `true` if the current symbol is a superscript.
552    pub fn symbol_is_superscript(&self) -> bool {
553        let ret = unsafe { c::TessResultIteratorSymbolIsSuperscript(self.iter.ptr.as_ptr()) };
554        ret != 0
555    }
556
557    /// Returns `true` if the current symbol is a subscript.
558    pub fn symbol_is_subscript(&self) -> bool {
559        let ret = unsafe { c::TessResultIteratorSymbolIsSubscript(self.iter.ptr.as_ptr()) };
560        ret != 0
561    }
562
563    /// Returns `true` if the current symbol is a dropcap.
564    pub fn symbol_is_dropcap(&self) -> bool {
565        let ret = unsafe { c::TessResultIteratorSymbolIsDropcap(self.iter.ptr.as_ptr()) };
566        ret != 0
567    }
568
569    /// Returns an iteratove over classifier choices for the current symbol.
570    pub fn choices(&self) -> ChoiceIterator<'a> {
571        let ptr = unsafe { c::TessResultIteratorGetChoiceIterator(self.iter.ptr.as_ptr()) };
572        let ptr = NonNull::new(ptr).expect("TessResultIteratorGetChoiceIterator returned NULL");
573        ChoiceIterator {
574            ptr,
575            results: self.iter,
576        }
577    }
578}
579
580impl<'a> Deref for TextElement<'a> {
581    type Target = Element<'a>;
582
583    fn deref(&self) -> &Self::Target {
584        unsafe { core::mem::transmute(self) }
585    }
586}
587
588/// A symbol choice.
589pub struct ClassifierChoice<'a> {
590    iter: &'a ChoiceIterator<'a>,
591}
592
593impl ClassifierChoice<'_> {
594    /// Returns the choice as UTF-8 string.
595    pub fn get_utf8_text(&self) -> &str {
596        let ptr = unsafe { c::TessChoiceIteratorGetUTF8Text(self.iter.ptr.as_ptr()) };
597        assert!(!ptr.is_null());
598        let c_str = unsafe { CStr::from_ptr(ptr) };
599        unsafe { core::str::from_utf8_unchecked(c_str.to_bytes()) }
600    }
601
602    /// Returns the confidence in the range of _[0; 100]_.
603    pub fn confidence(&self) -> f32 {
604        unsafe { c::TessChoiceIteratorConfidence(self.iter.ptr.as_ptr()) }
605    }
606}
607
608impl AsRef<str> for ClassifierChoice<'_> {
609    fn as_ref(&self) -> &str {
610        self.get_utf8_text()
611    }
612}
613
614/// An iterator over classifier choices for a symbol.
615pub struct ChoiceIterator<'a> {
616    ptr: NonNull<c::TessChoiceIterator>,
617    #[allow(unused)]
618    results: &'a ResultIter<'a>,
619}
620
621impl ChoiceIterator<'_> {
622    /// Returns the next choice.
623    #[allow(clippy::should_implement_trait)]
624    pub fn next(&mut self) -> Option<ClassifierChoice<'_>> {
625        let ret = unsafe { c::TessChoiceIteratorNext(self.ptr.as_ptr()) };
626        (ret != 0).then_some(ClassifierChoice { iter: self })
627    }
628}
629
630impl Drop for ChoiceIterator<'_> {
631    fn drop(&mut self) {
632        unsafe { c::TessChoiceIteratorDelete(self.ptr.as_ptr()) };
633    }
634}
635
636unsafe extern "C" fn cancel_callback<C: FnMut(i32) -> bool>(
637    cancel_this: *mut c_void,
638    words: i32,
639) -> bool {
640    let func: *mut C = cancel_this.cast();
641    let func: &mut C = unsafe { &mut *func };
642    func(words)
643}
644
645/// Monitor tracks text recognition progress and can be used to set the timeout.
646pub struct Monitor<C> {
647    ptr: NonNull<c::ETEXT_DESC>,
648    #[allow(unused)]
649    cancel: Option<Box<C>>,
650}
651
652impl Monitor<()> {
653    /// Creates new monitor without timeout and without cancel callback.
654    pub fn new() -> Self {
655        let ptr = unsafe { c::TessMonitorCreate() };
656        let ptr = NonNull::new(ptr).expect("TessMonitorCreate returned NULL");
657        Self { ptr, cancel: None }
658    }
659}
660
661impl Default for Monitor<()> {
662    fn default() -> Self {
663        Self::new()
664    }
665}
666
667impl<C: FnMut(i32) -> bool> Monitor<C> {
668    /// Creates new monitor with cancel callback.
669    pub fn with_cancel_callback(cancel: C) -> Self {
670        let ptr = unsafe { c::TessMonitorCreate() };
671        let ptr = NonNull::new(ptr).expect("TessMonitorCreate returned NULL");
672        unsafe { c::TessMonitorSetCancelFunc(ptr.as_ptr(), Some(cancel_callback::<C>)) };
673        let cancel = Box::new(cancel);
674        let cancel_raw = Box::into_raw(cancel);
675        unsafe { c::TessMonitorSetCancelThis(ptr.as_ptr(), cancel_raw as *mut c_void) };
676        let cancel = Some(unsafe { Box::from_raw(cancel_raw) });
677        Self { ptr, cancel }
678    }
679
680    /// Returns cancel callback.
681    pub fn get_cancel_callback(&mut self) -> &mut C {
682        self.cancel
683            .as_mut()
684            .expect("Set in the constructor")
685            .deref_mut()
686    }
687}
688
689impl<C> Monitor<C> {
690    /// Set progress callback function.
691    pub fn set_progress_callback_raw(&mut self, callback: c::TessProgressFunc) {
692        unsafe { c::TessMonitorSetProgressFunc(self.ptr.as_ptr(), callback) }
693    }
694
695    /// Get progress in _[0; 100]_ range.
696    pub fn get_progress(&self) -> u32 {
697        let ret = unsafe { c::TessMonitorGetProgress(self.ptr.as_ptr()) };
698        ret as u32
699    }
700
701    /// Set text recognition timeout.
702    pub fn set_timeout(&mut self, timeout: Duration) {
703        let millis = timeout.as_millis().try_into().unwrap_or(i32::MAX);
704        unsafe { c::TessMonitorSetDeadlineMSecs(self.ptr.as_ptr(), millis) };
705    }
706}
707
708impl<C> Drop for Monitor<C> {
709    fn drop(&mut self) {
710        unsafe { c::TessMonitorDelete(self.ptr.as_ptr()) }
711    }
712}