hocr_parser/
spec_definitions.rs1pub const HOCR_ELEMENTS: [&str; 39] = [
3 "ocr_page",
5 "ocr_column",
6 "ocr_carea",
7 "ocr_line",
8 "ocr_separator",
9 "ocr_noise",
10 "ocr_float",
12 "ocr_textfloat",
13 "ocr_textimage",
14 "ocr_image",
15 "ocr_linedrawing",
16 "ocr_photo",
17 "ocr_header",
18 "ocr_footer",
19 "ocr_pageno",
20 "ocr_table",
21 "ocr_document",
23 "ocr_title",
24 "ocr_author",
25 "ocr_abstract",
26 "ocr_part",
27 "ocr_chapter",
28 "ocr_section",
29 "ocr_subsection",
30 "ocr_subsubsection",
31 "ocr_display",
32 "ocr_blockquote",
33 "ocr_par",
34 "ocr_linear",
35 "ocr_caption",
36 "ocr_glyph",
38 "ocr_glyphs",
39 "ocr_dropcap",
40 "ocr_math",
41 "ocr_chem",
42 "ocr_cinfo",
43 "ocrx_block",
45 "ocrx_line",
46 "ocrx_word",
47];
48
49pub const HOCR_PROPERTIES: [&str; 21] = [
51 "baseline",
52 "bbox",
53 "cflow",
54 "cuts",
55 "hardbreak",
56 "image",
57 "imagemd5",
58 "lpageno",
59 "ppageno",
60 "nlp",
61 "order",
62 "poly",
63 "scan_res",
64 "textangle",
65 "x_bboxes",
66 "x_font",
67 "x_fsize",
68 "x_confs",
69 "x_scanner",
70 "x_source",
71 "x_wconf",
72];
73
74pub mod properties {
76 pub const BASELINE: &str = "baseline";
77 pub const BBOX: &str = "bbox";
78 pub const CFLOW: &str = "cflow";
79 pub const CUTS: &str = "cuts";
80 pub const HARDBREAK: &str = "hardbreak";
81 pub const IMAGE: &str = "image";
82 pub const IMAGEMD5: &str = "imagemd5";
83 pub const LPAGENO: &str = "lpageno";
84 pub const PPAGENO: &str = "ppageno";
85 pub const NLP: &str = "nlp";
86 pub const ORDER: &str = "order";
87 pub const POLY: &str = "poly";
88 pub const SCAN_RES: &str = "scan_res";
89 pub const TEXTANGLE: &str = "textangle";
90 pub const X_BBOXES: &str = "x_bboxes";
91 pub const X_FONT: &str = "x_font";
92 pub const X_FSIZE: &str = "x_fsize";
93 pub const X_CONFS: &str = "x_confs";
94 pub const X_SCANNER: &str = "x_scanner";
95 pub const X_SOURCE: &str = "x_source";
96 pub const X_WCONF: &str = "x_wconf";
97}
98
99pub mod elements {
101 pub use float::*;
102 pub use inline::*;
103 pub use logical::*;
104 pub use ocr_engine_specific::*;
105 pub use typesetting::*;
106
107 pub mod typesetting {
108 pub const OCR_PAGE: &str = "ocr_page";
109 pub const OCR_COLUMN: &str = "ocr_column";
110 pub const OCR_CAREA: &str = "ocr_carea";
111 pub const OCR_LINE: &str = "ocr_line";
112 pub const OCR_SEPERATOR: &str = "ocr_seperator";
113 pub const OCR_NOISE: &str = "ocr_noise";
114 }
115
116 pub mod float {
117 pub const OCR_FLOAT: &str = "ocr_float";
118 pub const OCR_TEXTFLOAT: &str = "ocr_textfloat";
119 pub const OCR_TEXTIMAGE: &str = "ocr_textimage";
120 pub const OCR_IMAGE: &str = "ocr_image";
121 pub const OCR_LINEDRAWING: &str = "ocr_linedrawing";
122 pub const OCR_PHOTO: &str = "ocr_photo";
123 pub const OCR_HEADER: &str = "ocr_header";
124 pub const OCR_FOOTER: &str = "ocr_footer";
125 pub const OCR_PAGENO: &str = "ocr_pageno";
126 pub const OCR_TABLE: &str = "ocr_table";
127 }
128
129 pub mod logical {
130 pub const OCR_DOCUMENT: &str = "ocr_document";
131 pub const OCR_TITLE: &str = "ocr_title";
132 pub const OCR_AUTHOR: &str = "ocr_author";
133 pub const OCR_ABSTRACT: &str = "ocr_abstract";
134 pub const OCR_PART: &str = "ocr_part";
135 pub const OCR_CHAPTER: &str = "ocr_chapter";
136 pub const OCR_SECTION: &str = "ocr_section";
137 pub const OCR_SUBSECTION: &str = "ocr_subsection";
138 pub const OCR_SUBSUBSECTION: &str = "ocr_subsubsection";
139 pub const OCR_DISPLAY: &str = "ocr_display";
140 pub const OCR_BLOCKQUOTE: &str = "ocr_blockquote";
141 pub const OCR_PAR: &str = "ocr_par";
142 pub const OCR_LINEAR: &str = "ocr_linear";
143 pub const OCR_CAPTION: &str = "ocr_caption";
144 }
145
146 pub mod inline {
147 pub const OCR_GLYPH: &str = "ocr_glyph";
148 pub const OCR_GLYPHS: &str = "ocr_glyphs";
149 pub const OCR_DROPCAP: &str = "ocr_dropcap";
150 pub const OCR_MATH: &str = "ocr_math";
151 pub const OCR_CHEM: &str = "ocr_chem";
152 pub const OCR_CINFO: &str = "ocr_cinfo";
153 }
154
155 pub mod ocr_engine_specific {
156 pub const OCRX_BLOCK: &str = "ocrx_block";
157 pub const OCRX_LINE: &str = "ocrx_line";
158 pub const OCRX_WORD: &str = "ocrx_word";
159 }
160}