tesseract-ocr-static 0.1.2

Ergonomic, self-contained, statically-built Rust interface for Tesseract OCR.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
use core::ffi::CStr;
use core::marker::PhantomData;
use core::ops::Deref;
use core::ops::DerefMut;
use core::ptr::NonNull;
use std::os::raw::c_void;
use std::time::Duration;

use crate::Element;
use crate::FontAttrs;
use crate::Image;
use crate::InitFailed;
use crate::LayoutIter;
use crate::LayoutLevel;
use crate::OcrEngineMode;
use crate::RecognitionFailed;
use crate::Rectangle;
use crate::Tesseract;
use crate::Text;
use crate::Utf8Text;

const ENGLISH: &CStr = c"eng";

/// OCR configuration.
#[derive(Debug)]
pub struct Config<'a, 'b> {
    /// Data directory where language-specific training data is stored.
    ///
    /// If not specified, the `TESSDATA_PREFIX` environment variable is used instead.
    /// If the variable isn't defined, the build-time default is used.
    pub data_dir: Option<&'a CStr>,
    /// Languages are specified by their three-letter ISO codes separated by '+' symbol.
    ///
    /// English is the default.
    pub languages: &'b CStr,
    /// OCR engine mode. LSTM is the default.
    pub ocr_engine_mode: OcrEngineMode,
}

impl Default for Config<'static, 'static> {
    fn default() -> Self {
        Self {
            data_dir: None,
            languages: ENGLISH,
            ocr_engine_mode: OcrEngineMode::LstmOnly,
        }
    }
}

/// OCR engine interface.
///
/// ```rust,no_run
/// use tesseract_ocr_static::{Image, TextRecognizer};
/// use image::ImageReader;
///
/// let rgb = ImageReader::open("hello.txt").unwrap().decode().unwrap().into_rgb8();
/// let image = Image::from_rgb(rgb.width(), rgb.height(), rgb.as_raw()).unwrap();
/// let mut recognizer = TextRecognizer::new().unwrap();
/// let results = recognizer.recognize_text(&image).unwrap();
/// assert_eq!("Hello world", results.get_utf8_text().as_str());
/// ```
pub struct TextRecognizer {
    base: Tesseract,
}

impl TextRecognizer {
    /// Creates new text recognizer with the default data directory, default language (English),
    /// and default OCR engine mode (LSTM).
    pub fn new() -> Result<Self, InitFailed> {
        Self::with_languages(ENGLISH)
    }

    /// Creates new text recognizer with the specified languages.
    ///
    /// Languages are specified by their three-letter ISO codes separated by '+' symbol.
    pub fn with_languages(languages: &CStr) -> Result<Self, InitFailed> {
        Self::with_config(Config {
            languages,
            ..Default::default()
        })
    }

    /// Creates new text recognizer with the provided configuration.
    pub fn with_config(config: Config<'_, '_>) -> Result<Self, InitFailed> {
        let ptr = unsafe { c::TessBaseAPICreate() };
        let ptr = NonNull::new(ptr).expect("TessBaseAPICreate returned NULL");
        let ret = unsafe {
            c::TessBaseAPIInit2(
                ptr.as_ptr(),
                config
                    .data_dir
                    .map(|x| x.as_ptr())
                    .unwrap_or(core::ptr::null_mut()),
                config.languages.as_ptr(),
                config.ocr_engine_mode as u32,
            )
        };
        if ret < 0 {
            return Err(InitFailed);
        }
        let base = Tesseract { ptr };
        Ok(Self { base })
    }

    /// Recognizes text in the provided image and returns an iterator over the results.
    pub fn recognize_text<'a>(
        &'a mut self,
        image: &Image,
    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
        let monitor: Option<Monitor<()>> = None;
        self.do_recognize_text(image, monitor)
    }

    /// Recognizes text in the provided image and returns an iterator over the results.
    ///
    /// Timeout is the max. time spent for text recognition.
    pub fn recognize_text_with_timeout<'a>(
        &'a mut self,
        image: &Image,
        timeout: Duration,
    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
        let mut monitor = Monitor::<()>::new();
        monitor.set_timeout(timeout);
        self.do_recognize_text(image, Some(monitor))
    }

    /// Recognizes text in the provided image and returns an iterator over the results.
    ///
    /// The monitor is used to track the progress and set the timeout.
    pub fn recognize_text_with_monitor<'a, C>(
        &'a mut self,
        image: &Image,
        monitor: Monitor<C>,
    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
        self.do_recognize_text(image, Some(monitor))
    }

    fn do_recognize_text<'a, C>(
        &'a mut self,
        image: &Image,
        monitor: Option<Monitor<C>>,
    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
        unsafe { c::TessBaseAPISetImage2(self.as_ptr(), image.ptr.as_ptr()) };
        let monitor_ptr = monitor
            .map(|m| m.ptr.as_ptr())
            .unwrap_or(core::ptr::null_mut());
        let ret = unsafe { c::TessBaseAPIRecognize(self.as_ptr(), monitor_ptr) };
        if ret < 0 {
            return Err(RecognitionFailed);
        }
        Ok(RecognitionResults { inner: self })
    }

    /// Recognizes text in the specified rectangle of the provided image and
    /// returns an iterator over the results.
    pub fn recognize_text_in_rect<'a>(
        &'a mut self,
        image: &Image,
        rect: &Rectangle,
    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
        let monitor: Option<Monitor<()>> = None;
        self.do_recognize_text_in_rect(image, rect, monitor)
    }

    /// Recognizes text in the specified rectangle of the provided image and
    /// returns an iterator over the results.
    ///
    /// Timeout is the max. time spent for text recognition.
    pub fn recognize_text_in_rect_with_timeout<'a>(
        &'a mut self,
        image: &Image,
        rect: &Rectangle,
        timeout: Duration,
    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
        let mut monitor = Monitor::<()>::new();
        monitor.set_timeout(timeout);
        self.do_recognize_text_in_rect(image, rect, Some(monitor))
    }

    /// Recognizes text in the specified rectangle of the provided image and
    /// returns an iterator over the results.
    ///
    /// The monitor is used to track the progress and set the timeout.
    pub fn recognize_text_in_rect_with_monitor<'a, C>(
        &'a mut self,
        image: &Image,
        rect: &Rectangle,
        monitor: Monitor<C>,
    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
        self.do_recognize_text_in_rect(image, rect, Some(monitor))
    }

    fn do_recognize_text_in_rect<'a, C>(
        &'a mut self,
        image: &Image,
        rect: &Rectangle,
        monitor: Option<Monitor<C>>,
    ) -> Result<RecognitionResults<'a>, RecognitionFailed> {
        unsafe { c::TessBaseAPISetImage2(self.as_ptr(), image.ptr.as_ptr()) };
        unsafe {
            c::TessBaseAPISetRectangle(
                self.as_ptr(),
                rect.left as i32,
                rect.top as i32,
                rect.width as i32,
                rect.height as i32,
            )
        };
        let monitor_ptr = monitor
            .map(|m| m.ptr.as_ptr())
            .unwrap_or(core::ptr::null_mut());
        let ret = unsafe { c::TessBaseAPIRecognize(self.as_ptr(), monitor_ptr) };
        if ret < 0 {
            return Err(RecognitionFailed);
        }
        Ok(RecognitionResults { inner: self })
    }

    /// Analyzes the text layout in the provided image and returns layout analysis results as an
    /// iterator.
    ///
    /// If you only need layout, consider using [`LayoutAnalyzer`](crate::LayoutAnalyzer) that uses
    /// less memory.
    pub fn analyze_layout<'a>(&'a self, image: &Image) -> LayoutIter<'a> {
        unsafe { c::TessBaseAPISetImage2(self.as_ptr(), image.ptr.as_ptr()) };
        let ptr = unsafe { c::TessBaseAPIAnalyseLayout(self.as_ptr()) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIAnalyseLayout returned NULL");
        unsafe { c::TessPageIteratorBegin(ptr.as_ptr()) };
        LayoutIter {
            ptr,
            phantom: PhantomData,
        }
    }

    /// Returns the number of Directed Acyclic Word Graph (DAWG) in the dictionary.
    pub fn num_dawgs(&self) -> u32 {
        let ret = unsafe { c::TessBaseAPIGetPageSegMode(self.ptr.as_ptr()) };
        ret as u32
    }

    #[inline]
    fn as_ptr(&self) -> *mut c::TessBaseAPI {
        self.base.ptr.as_ptr()
    }
}

impl Deref for TextRecognizer {
    type Target = Tesseract;

    fn deref(&self) -> &Self::Target {
        &self.base
    }
}

impl DerefMut for TextRecognizer {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.base
    }
}

/// Text recognition results.
pub struct RecognitionResults<'a> {
    inner: &'a TextRecognizer,
}

impl<'a> RecognitionResults<'a> {
    /// Returns recognized text as string.
    pub fn get_utf8_text(&self) -> Utf8Text {
        let ptr = unsafe { c::TessBaseAPIGetUTF8Text(self.as_ptr()) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetUTF8Text returned NULL");
        Utf8Text(Text { ptr })
    }

    /// Returns recognized text as HTML-formatted string with
    /// [hOCR markup](https://en.wikipedia.org/wiki/HOCR).
    ///
    /// `page` is zero-based page index that appears in the output as one-based.
    pub fn get_hocr_text(&self, page: u32) -> Text {
        let ptr = unsafe { c::TessBaseAPIGetHOCRText(self.as_ptr(), page as i32) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetHOCRText returned NULL");
        Text { ptr }
    }

    /// Returns recognized text as XML-formatted string with
    /// [ALTO markup](https://en.wikipedia.org/wiki/Analyzed_Layout_and_Text_Object).
    ///
    /// `page` is zero-based page index that appears in the output.
    pub fn get_alto_text(&self, page: u32) -> Text {
        let ptr = unsafe { c::TessBaseAPIGetAltoText(self.as_ptr(), page as i32) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetAltoText returned NULL");
        Text { ptr }
    }

    /// Returns recognized text as XML-formatted string with PAGE markup.
    ///
    /// `page` is zero-based page index that appears in the output as one-based.
    ///
    /// WARNING: This function is currently broken (throws an exception).
    #[doc(hidden)]
    pub fn get_page_text(&self, page: u32) -> Text {
        let ptr = unsafe { c::TessBaseAPIGetPAGEText(self.as_ptr(), page as i32) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetPAGEText returned NULL");
        Text { ptr }
    }

    /// Returns recognized text as TSV-formatted string.
    ///
    /// `page` is zero-based page index that appears in the output as one-based.
    ///
    /// # TSV columns
    ///
    /// | Column | Comment |
    /// |--------|---------|
    /// | Page number | |
    /// | Block index | |
    /// | Paragraph index | |
    /// | Line index | |
    /// | Word index | |
    /// | Bounding box left | |
    /// | Bounding box top | |
    /// | Bounding box width | |
    /// | Bounding box height | |
    /// | Confidence | `-1` means end of the element |
    /// | Word | |
    pub fn get_tsv_text(&self, page: u32) -> Text {
        let ptr = unsafe { c::TessBaseAPIGetTsvText(self.as_ptr(), page as i32) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetTsvText returned NULL");
        Text { ptr }
    }

    /// Returns the
    /// [box file](https://tesseract-ocr.github.io/tessdoc/tess4/Make-Box-Files.html)
    /// for the page.
    ///
    /// `page` is zero-based page index that appears in the output.
    pub fn get_box_text(&self, page: u32) -> Text {
        let ptr = unsafe { c::TessBaseAPIGetBoxText(self.as_ptr(), page as i32) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetBoxText returned NULL");
        Text { ptr }
    }

    /// Returns the
    /// [LSTM box file](https://tesseract-ocr.github.io/tessdoc/tess4/Make-Box-Files.html)
    /// for the page.
    ///
    /// `page` is zero-based page index that appears in the output.
    pub fn get_lstm_box_text(&self, page: u32) -> Text {
        let ptr = unsafe { c::TessBaseAPIGetLSTMBoxText(self.as_ptr(), page as i32) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetLSTMBoxText returned NULL");
        Text { ptr }
    }

    /// Returns the
    /// [WordStr box file](https://tesseract-ocr.github.io/tessdoc/tess4/Make-Box-Files.html)
    /// for the page.
    ///
    /// `page` is zero-based page index that appears in the output.
    pub fn get_word_str_box_text(&self, page: u32) -> Text {
        let ptr = unsafe { c::TessBaseAPIGetWordStrBoxText(self.as_ptr(), page as i32) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetWordStrBoxText returned NULL");
        Text { ptr }
    }

    /// Returns recognized text as UNLV-formatted string.
    pub fn get_unlv_text(&self) -> Text {
        let ptr = unsafe { c::TessBaseAPIGetUNLVText(self.as_ptr()) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetUNLVText returned NULL");
        Text { ptr }
    }

    /// Returns an iterator over text elements.
    pub fn iter(&self) -> ResultIter<'a> {
        let ptr = unsafe { c::TessBaseAPIGetIterator(self.as_ptr()) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetIterator returned NULL");
        ResultIter {
            ptr,
            phantom: PhantomData,
        }
    }

    /// Returns a copy of the thresholded image.
    pub fn get_thresholded_image(&self) -> Image {
        let ptr = unsafe { c::TessBaseAPIGetThresholdedImage(self.as_ptr()) };
        let ptr = NonNull::new(ptr).expect("TessBaseAPIGetThresholdedImage returned NULL");
        Image { ptr }
    }

    /// Returns thresholded image scale.
    pub fn get_thresholded_image_scale_factor(&self) -> u32 {
        let ret = unsafe { c::TessBaseAPIGetThresholdedImageScaleFactor(self.as_ptr()) };
        ret as u32
    }

    /// Returns average gradient of lines on page.
    pub fn get_gradient(&self) -> f32 {
        unsafe { c::TessBaseAPIGetGradient(self.as_ptr()) }
    }

    /// Returns `true` if the word is valid according to Tesseract's language model.
    #[doc(hidden)]
    pub fn is_valid_word(&self, word: &CStr) -> bool {
        let ret = unsafe { c::TessBaseAPIIsValidWord(self.as_ptr(), word.as_ptr()) };
        ret != 0
    }

    /// Returns text direction in Tesseract's coordinates.
    #[doc(hidden)]
    pub fn get_text_direction(&self) -> Option<(u32, f32)> {
        let mut offset: i32 = 0;
        let mut slope: f32 = 0.0;
        let ret = unsafe { c::TessBaseAPIGetTextDirection(self.as_ptr(), &mut offset, &mut slope) };
        if ret == 0 {
            return None;
        }
        Some((offset as u32, slope))
    }

    #[inline]
    fn as_ptr(&self) -> *mut c::TessBaseAPI {
        self.inner.as_ptr()
    }
}

/// An iterator over recognition results.
pub struct ResultIter<'a> {
    ptr: NonNull<c::TessResultIterator>,
    #[allow(unused)]
    phantom: PhantomData<&'a Tesseract>,
}

impl<'a> ResultIter<'a> {
    /// Returns the next text element at the specified level or
    /// `None` if such an element doesn't exist.
    #[must_use]
    pub fn next(&mut self, level: LayoutLevel) -> Option<TextElement<'_>> {
        let ret = unsafe { c::TessResultIteratorNext(self.ptr.as_ptr(), level as u32) };
        (ret != 0).then_some(TextElement { iter: self })
    }

    /// Returnrs an iterator over layout elements.
    pub fn as_layout_iter(&self) -> LayoutIter<'a> {
        let ptr = unsafe { c::TessResultIteratorGetPageIterator(self.ptr.as_ptr()) };
        let ptr = NonNull::new(ptr).expect("TessResultIteratorGetPageIterator returned NULL");
        LayoutIter {
            ptr,
            phantom: PhantomData,
        }
    }
}

impl Drop for ResultIter<'_> {
    fn drop(&mut self) {
        unsafe { c::TessResultIteratorDelete(self.ptr.as_ptr()) };
    }
}

impl Clone for ResultIter<'_> {
    fn clone(&self) -> Self {
        let ptr = unsafe { c::TessResultIteratorCopy(self.ptr.as_ptr()) };
        let ptr = NonNull::new(ptr).expect("TessResultIteratorCopy returned NULL");
        Self {
            ptr,
            phantom: PhantomData,
        }
    }
}

/// Text element.
///
/// A layout element with the recognized text.
pub struct TextElement<'a> {
    iter: &'a ResultIter<'a>,
}

impl<'a> TextElement<'a> {
    /// Get recognized text as UTF-8 string.
    pub fn get_utf8_text(&self, level: LayoutLevel) -> Utf8Text {
        let ptr = unsafe { c::TessResultIteratorGetUTF8Text(self.iter.ptr.as_ptr(), level as u32) };
        let ptr = NonNull::new(ptr).expect("TessResultIteratorGetUTF8Text returned NULL");
        Utf8Text(Text { ptr })
    }

    /// Returns the mean confidence of the element at the given level.
    ///
    /// The confidence range is _[0; 100]_.
    pub fn confidence(&self, level: LayoutLevel) -> f32 {
        unsafe { c::TessResultIteratorConfidence(self.iter.ptr.as_ptr(), level as u32) }
    }

    /// Returns the language that was used to recognize the word.
    pub fn word_recognition_language(&self) -> Option<&CStr> {
        let ptr = unsafe { c::TessResultIteratorWordRecognitionLanguage(self.iter.ptr.as_ptr()) };
        if ptr.is_null() {
            return None;
        }
        Some(unsafe { CStr::from_ptr(ptr) })
    }

    /// Returns `true` if the current word is a dictionary word.
    pub fn word_is_from_dictionary(&self) -> bool {
        let ret = unsafe { c::TessResultIteratorWordIsFromDictionary(self.iter.ptr.as_ptr()) };
        ret != 0
    }

    /// Returns true if the current word is a number.
    pub fn word_is_numeric(&self) -> bool {
        let ret = unsafe { c::TessResultIteratorWordIsNumeric(self.iter.ptr.as_ptr()) };
        ret != 0
    }

    /// Returns font attributes of the current word as well as the font name.
    pub fn word_font_attributes(&self) -> Option<(FontAttrs, &CStr)> {
        let mut is_bold = 0;
        let mut is_italic = 0;
        let mut is_underlined = 0;
        let mut is_monospace = 0;
        let mut is_serif = 0;
        let mut is_smallcaps = 0;
        let mut point_size = 0;
        let mut font_id = 0;
        let ptr = unsafe {
            c::TessResultIteratorWordFontAttributes(
                self.iter.ptr.as_ptr(),
                &mut is_bold,
                &mut is_italic,
                &mut is_underlined,
                &mut is_monospace,
                &mut is_serif,
                &mut is_smallcaps,
                &mut point_size,
                &mut font_id,
            )
        };
        if ptr.is_null() {
            return None;
        }
        let font = unsafe { CStr::from_ptr(ptr) };
        let attrs = FontAttrs {
            is_bold: is_bold != 0,
            is_italic: is_italic != 0,
            is_underlined: is_underlined != 0,
            is_monospace: is_monospace != 0,
            is_serif: is_serif != 0,
            is_smallcaps: is_smallcaps != 0,
            point_size: point_size as u32,
            font_id,
        };
        Some((attrs, font))
    }

    /// Returns `true` if the current symbol is a superscript.
    pub fn symbol_is_superscript(&self) -> bool {
        let ret = unsafe { c::TessResultIteratorSymbolIsSuperscript(self.iter.ptr.as_ptr()) };
        ret != 0
    }

    /// Returns `true` if the current symbol is a subscript.
    pub fn symbol_is_subscript(&self) -> bool {
        let ret = unsafe { c::TessResultIteratorSymbolIsSubscript(self.iter.ptr.as_ptr()) };
        ret != 0
    }

    /// Returns `true` if the current symbol is a dropcap.
    pub fn symbol_is_dropcap(&self) -> bool {
        let ret = unsafe { c::TessResultIteratorSymbolIsDropcap(self.iter.ptr.as_ptr()) };
        ret != 0
    }

    /// Returns an iteratove over classifier choices for the current symbol.
    pub fn choices(&self) -> ChoiceIterator<'a> {
        let ptr = unsafe { c::TessResultIteratorGetChoiceIterator(self.iter.ptr.as_ptr()) };
        let ptr = NonNull::new(ptr).expect("TessResultIteratorGetChoiceIterator returned NULL");
        ChoiceIterator {
            ptr,
            results: self.iter,
        }
    }
}

impl<'a> Deref for TextElement<'a> {
    type Target = Element<'a>;

    fn deref(&self) -> &Self::Target {
        unsafe { core::mem::transmute(self) }
    }
}

/// A symbol choice.
pub struct ClassifierChoice<'a> {
    iter: &'a ChoiceIterator<'a>,
}

impl ClassifierChoice<'_> {
    /// Returns the choice as UTF-8 string.
    pub fn get_utf8_text(&self) -> &str {
        let ptr = unsafe { c::TessChoiceIteratorGetUTF8Text(self.iter.ptr.as_ptr()) };
        assert!(!ptr.is_null());
        let c_str = unsafe { CStr::from_ptr(ptr) };
        unsafe { core::str::from_utf8_unchecked(c_str.to_bytes()) }
    }

    /// Returns the confidence in the range of _[0; 100]_.
    pub fn confidence(&self) -> f32 {
        unsafe { c::TessChoiceIteratorConfidence(self.iter.ptr.as_ptr()) }
    }
}

impl AsRef<str> for ClassifierChoice<'_> {
    fn as_ref(&self) -> &str {
        self.get_utf8_text()
    }
}

/// An iterator over classifier choices for a symbol.
pub struct ChoiceIterator<'a> {
    ptr: NonNull<c::TessChoiceIterator>,
    #[allow(unused)]
    results: &'a ResultIter<'a>,
}

impl ChoiceIterator<'_> {
    /// Returns the next choice.
    #[allow(clippy::should_implement_trait)]
    pub fn next(&mut self) -> Option<ClassifierChoice<'_>> {
        let ret = unsafe { c::TessChoiceIteratorNext(self.ptr.as_ptr()) };
        (ret != 0).then_some(ClassifierChoice { iter: self })
    }
}

impl Drop for ChoiceIterator<'_> {
    fn drop(&mut self) {
        unsafe { c::TessChoiceIteratorDelete(self.ptr.as_ptr()) };
    }
}

unsafe extern "C" fn cancel_callback<C: FnMut(i32) -> bool>(
    cancel_this: *mut c_void,
    words: i32,
) -> bool {
    let func: *mut C = cancel_this.cast();
    let func: &mut C = unsafe { &mut *func };
    func(words)
}

/// Monitor tracks text recognition progress and can be used to set the timeout.
pub struct Monitor<C> {
    ptr: NonNull<c::ETEXT_DESC>,
    #[allow(unused)]
    cancel: Option<Box<C>>,
}

impl Monitor<()> {
    /// Creates new monitor without timeout and without cancel callback.
    pub fn new() -> Self {
        let ptr = unsafe { c::TessMonitorCreate() };
        let ptr = NonNull::new(ptr).expect("TessMonitorCreate returned NULL");
        Self { ptr, cancel: None }
    }
}

impl Default for Monitor<()> {
    fn default() -> Self {
        Self::new()
    }
}

impl<C: FnMut(i32) -> bool> Monitor<C> {
    /// Creates new monitor with cancel callback.
    pub fn with_cancel_callback(cancel: C) -> Self {
        let ptr = unsafe { c::TessMonitorCreate() };
        let ptr = NonNull::new(ptr).expect("TessMonitorCreate returned NULL");
        unsafe { c::TessMonitorSetCancelFunc(ptr.as_ptr(), Some(cancel_callback::<C>)) };
        let cancel = Box::new(cancel);
        let cancel_raw = Box::into_raw(cancel);
        unsafe { c::TessMonitorSetCancelThis(ptr.as_ptr(), cancel_raw as *mut c_void) };
        let cancel = Some(unsafe { Box::from_raw(cancel_raw) });
        Self { ptr, cancel }
    }

    /// Returns cancel callback.
    pub fn get_cancel_callback(&mut self) -> &mut C {
        self.cancel
            .as_mut()
            .expect("Set in the constructor")
            .deref_mut()
    }
}

impl<C> Monitor<C> {
    /// Set progress callback function.
    pub fn set_progress_callback_raw(&mut self, callback: c::TessProgressFunc) {
        unsafe { c::TessMonitorSetProgressFunc(self.ptr.as_ptr(), callback) }
    }

    /// Get progress in _[0; 100]_ range.
    pub fn get_progress(&self) -> u32 {
        let ret = unsafe { c::TessMonitorGetProgress(self.ptr.as_ptr()) };
        ret as u32
    }

    /// Set text recognition timeout.
    pub fn set_timeout(&mut self, timeout: Duration) {
        let millis = timeout.as_millis().try_into().unwrap_or(i32::MAX);
        unsafe { c::TessMonitorSetDeadlineMSecs(self.ptr.as_ptr(), millis) };
    }
}

impl<C> Drop for Monitor<C> {
    fn drop(&mut self) {
        unsafe { c::TessMonitorDelete(self.ptr.as_ptr()) }
    }
}