tesseract_ocr_static/
types.rs1use core::ffi::CStr;
2use core::ptr::NonNull;
3use std::os::raw::c_char;
4
5macro_rules! define_enum {
6 ($enum: ident
7 $doc: literal
8 $(($variant: ident $value: ident $variant_doc: literal $($deprecated: literal)*))+) => {
9 #[doc = $doc]
10 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
11 pub enum $enum {
12 $(
13 #[doc = $variant_doc]
14 $(#[deprecated = $deprecated])*
15 $variant = c::$value as isize,
16 )+
17 }
18
19 impl $enum {
20 #[allow(unused)]
21 #[allow(deprecated)]
22 pub(crate) fn from_raw(value: u32) -> Self {
23 match value {
24 $(c::$value => Self::$variant,)+
25 _ => panic!(concat!("Invalid ", stringify!($enum), " value")),
26 }
27 }
28 }
29 };
30}
31
32define_enum! {
33 LayoutLevel "Element of layout hierarchy"
34 (Block TessPageIteratorLevel_RIL_BLOCK "Block of text/image/separator line.")
35 (Paragraph TessPageIteratorLevel_RIL_PARA "Paragraph within a block.")
36 (TextLine TessPageIteratorLevel_RIL_TEXTLINE "Line within a paragraph.")
37 (Word TessPageIteratorLevel_RIL_WORD "Word within a textline.")
38 (Symbol TessPageIteratorLevel_RIL_SYMBOL "Symbol/character within a word.")
39}
40
41define_enum! {
42 OcrEngineMode "OCR engine mode"
43 (TesseractOnly TessOcrEngineMode_OEM_TESSERACT_ONLY "Run Tesseract only." "Use LstmOnly instead.")
44 (LstmOnly TessOcrEngineMode_OEM_LSTM_ONLY "Run just the LSTM line recognizer.")
45 (TesseractLstmCombined TessOcrEngineMode_OEM_TESSERACT_LSTM_COMBINED
46 "Run LSTM, fall back to Tesseract when things get difficult." "Use LstmOnly instead.")
47 (Default TessOcrEngineMode_OEM_DEFAULT "Choose mode automatically based on the configuration.")
48}
49
50define_enum! {
51 PageSegmentationMode "Page layout analysis modes."
52 (OsdOnly TessPageSegMode_PSM_OSD_ONLY "Orientation and script detection only.")
53 (AutoOsd TessPageSegMode_PSM_AUTO_OSD "Automatic page segmentation with orientation and script detection (OSD).")
54 (AutoOnly TessPageSegMode_PSM_AUTO_ONLY "Automatic page segmentation, but no OSD, or OCR.")
55 (Auto TessPageSegMode_PSM_AUTO "Fully automatic page segmentation, but no OSD.")
56 (SingleColumn TessPageSegMode_PSM_SINGLE_COLUMN "Assume a single column of text of variable sizes.")
57 (SingleBlockVertText TessPageSegMode_PSM_SINGLE_BLOCK_VERT_TEXT "Assume a single uniform block of vertically aligned text.")
58 (SingleBlock TessPageSegMode_PSM_SINGLE_BLOCK "Assume a single uniform block of text (default).")
59 (SingleLine TessPageSegMode_PSM_SINGLE_LINE "Treat the image as a single text line.")
60 (SingleWord TessPageSegMode_PSM_SINGLE_WORD "Treat the image as a single word.")
61 (CircleWord TessPageSegMode_PSM_CIRCLE_WORD "Treat the image as a single word in a circle.")
62 (SingleChar TessPageSegMode_PSM_SINGLE_CHAR "Treat the image as a single character.")
63 (SparseText TessPageSegMode_PSM_SPARSE_TEXT "Find as much text as possible in no particular order.")
64 (SparseTextOsd TessPageSegMode_PSM_SPARSE_TEXT_OSD "Sparse text with orientation and script det.")
65 (RawLine TessPageSegMode_PSM_RAW_LINE "Treat the image as a single text line, bypassing hacks that are Tesseract-specific.")
66}
67
68#[allow(clippy::derivable_impls)]
69impl Default for PageSegmentationMode {
70 fn default() -> Self {
71 Self::SingleBlock
72 }
73}
74
75define_enum! {
76 BlockType "Block type."
77 (Unknown TessPolyBlockType_PT_UNKNOWN "Type is not yet known.")
78 (FlowingText TessPolyBlockType_PT_FLOWING_TEXT "Text that lives inside a column.")
79 (HeadingText TessPolyBlockType_PT_HEADING_TEXT "Text that spans more than one column.")
80 (PulloutText TessPolyBlockType_PT_PULLOUT_TEXT "Text that is in a cross-column pull-out region.")
81 (Equation TessPolyBlockType_PT_EQUATION "Partition belonging to an equation region.")
82 (InlineEquation TessPolyBlockType_PT_INLINE_EQUATION "Partition has inline equation.")
83 (Table TessPolyBlockType_PT_TABLE "Partition belonging to a table region.")
84 (VerticalText TessPolyBlockType_PT_VERTICAL_TEXT "Text-line runs vertically.")
85 (CaptionText TessPolyBlockType_PT_CAPTION_TEXT "Text that belongs to an image.")
86 (FlowingImage TessPolyBlockType_PT_FLOWING_IMAGE "Image that lives inside a column.")
87 (HeadingImage TessPolyBlockType_PT_HEADING_IMAGE "Image that spans more than one column.")
88 (PulloutImage TessPolyBlockType_PT_PULLOUT_IMAGE "Image that is in a cross-column pull-out region.")
89 (HorzLine TessPolyBlockType_PT_HORZ_LINE "Horizontal line.")
90 (VertLine TessPolyBlockType_PT_VERT_LINE "Vertical line.")
91 (Noise TessPolyBlockType_PT_NOISE "Lies outside of any column.")
92}
93
94define_enum! {
95 Orientation "Text position on the page."
96 (Up TessOrientation_ORIENTATION_PAGE_UP "Up")
97 (Right TessOrientation_ORIENTATION_PAGE_RIGHT "Right")
98 (Down TessOrientation_ORIENTATION_PAGE_DOWN "Down")
99 (Left TessOrientation_ORIENTATION_PAGE_LEFT "Left")
100}
101
102define_enum! {
103 WritingDirection "Text writing direction."
104 (LeftToRight TessWritingDirection_WRITING_DIRECTION_LEFT_TO_RIGHT "Left-to-right")
105 (RightToLeft TessWritingDirection_WRITING_DIRECTION_RIGHT_TO_LEFT "Right-to-left")
106 (TopToBottom TessWritingDirection_WRITING_DIRECTION_TOP_TO_BOTTOM "Top-to-bottom")
107}
108
109define_enum! {
110 TextlineOrder "The order of the lines of text."
111 (LeftToRight TessTextlineOrder_TEXTLINE_ORDER_LEFT_TO_RIGHT "Left-to-right")
112 (RightToLeft TessTextlineOrder_TEXTLINE_ORDER_RIGHT_TO_LEFT "Right-to-left")
113 (TopToBottom TessTextlineOrder_TEXTLINE_ORDER_TOP_TO_BOTTOM "Top-to-bottom")
114}
115
116define_enum! {
117 ParagraphJustification "Paragraph alignment."
118 (Unknown TessParagraphJustification_JUSTIFICATION_UNKNOWN "The alignment is unclear.")
119 (Left TessParagraphJustification_JUSTIFICATION_LEFT
120 "Each line, except possibly the first, is flush to the same left tab stop.")
121 (Center TessParagraphJustification_JUSTIFICATION_CENTER
122 "The text lines of the paragraph are centered about a line going down through their middle of the text lines.")
123 (Right TessParagraphJustification_JUSTIFICATION_RIGHT
124 "Each line, except possibly the first, is flush to the same right tab stop.")
125}
126
127#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct Rectangle {
132 pub left: u32,
133 pub top: u32,
134 pub width: u32,
135 pub height: u32,
136}
137
138#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct Line {
143 pub x1: u32,
144 pub y1: u32,
145 pub x2: u32,
146 pub y2: u32,
147}
148
149#[derive(Debug, Clone)]
151pub struct OrientationParams {
152 pub orientation: Orientation,
154 pub writing_direction: WritingDirection,
156 pub textline_order: TextlineOrder,
158 pub deskew_angle: f32,
162}
163
164#[derive(Debug, Clone)]
166pub struct ParagraphInfo {
167 pub justification: ParagraphJustification,
169 pub is_list_item: bool,
171 pub is_crown: bool,
177 pub first_line_indent: i32,
181}
182
183#[derive(Debug, Clone)]
185pub struct FontAttrs {
186 pub is_bold: bool,
187 pub is_italic: bool,
188 pub is_underlined: bool,
189 pub is_monospace: bool,
190 pub is_serif: bool,
191 pub is_smallcaps: bool,
192 pub point_size: u32,
193 pub font_id: i32,
194}
195
196pub struct Text {
198 pub(crate) ptr: NonNull<c_char>,
199}
200
201impl Text {
202 pub fn as_c_str(&self) -> &CStr {
204 unsafe { CStr::from_ptr(self.ptr.as_ptr()) }
205 }
206}
207
208impl Drop for Text {
209 fn drop(&mut self) {
210 unsafe { c::TessDeleteText(self.ptr.as_ptr()) };
211 }
212}
213
214impl AsRef<CStr> for Text {
215 fn as_ref(&self) -> &CStr {
216 self.as_c_str()
217 }
218}
219
220impl core::fmt::Debug for Text {
221 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
222 core::fmt::Debug::fmt(self.as_c_str(), f)
223 }
224}
225
226pub struct Utf8Text(pub(crate) Text);
228
229impl Utf8Text {
230 pub fn as_str(&self) -> &str {
232 unsafe { core::str::from_utf8_unchecked(self.0.as_c_str().to_bytes()) }
233 }
234}
235
236impl AsRef<str> for Utf8Text {
237 fn as_ref(&self) -> &str {
238 self.as_str()
239 }
240}
241
242impl core::fmt::Debug for Utf8Text {
243 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
244 core::fmt::Debug::fmt(self.as_str(), f)
245 }
246}