hocr_parser/
spec_definitions.rs

1/// An array of all hOCR element types.
2pub const HOCR_ELEMENTS: [&str; 39] = [
3    // Typesetting elements
4    "ocr_page",
5    "ocr_column",
6    "ocr_carea",
7    "ocr_line",
8    "ocr_separator",
9    "ocr_noise",
10    // Float elements
11    "ocr_float",
12    "ocr_textfloat",
13    "ocr_textimage",
14    "ocr_image",
15    "ocr_linedrawing",
16    "ocr_photo",
17    "ocr_header",
18    "ocr_footer",
19    "ocr_pageno",
20    "ocr_table",
21    // Logical elements
22    "ocr_document",
23    "ocr_title",
24    "ocr_author",
25    "ocr_abstract",
26    "ocr_part",
27    "ocr_chapter",
28    "ocr_section",
29    "ocr_subsection",
30    "ocr_subsubsection",
31    "ocr_display",
32    "ocr_blockquote",
33    "ocr_par",
34    "ocr_linear",
35    "ocr_caption",
36    // Inline elements
37    "ocr_glyph",
38    "ocr_glyphs",
39    "ocr_dropcap",
40    "ocr_math",
41    "ocr_chem",
42    "ocr_cinfo",
43    // OCR Engine-specific elements
44    "ocrx_block",
45    "ocrx_line",
46    "ocrx_word",
47];
48
49/// An array of all hOCR properties.
50pub const HOCR_PROPERTIES: [&str; 21] = [
51    "baseline",
52    "bbox",
53    "cflow",
54    "cuts",
55    "hardbreak",
56    "image",
57    "imagemd5",
58    "lpageno",
59    "ppageno",
60    "nlp",
61    "order",
62    "poly",
63    "scan_res",
64    "textangle",
65    "x_bboxes",
66    "x_font",
67    "x_fsize",
68    "x_confs",
69    "x_scanner",
70    "x_source",
71    "x_wconf",
72];
73
74/// All hOCR properties as variables for auto-complete.
75pub mod properties {
76    pub const BASELINE: &str = "baseline";
77    pub const BBOX: &str = "bbox";
78    pub const CFLOW: &str = "cflow";
79    pub const CUTS: &str = "cuts";
80    pub const HARDBREAK: &str = "hardbreak";
81    pub const IMAGE: &str = "image";
82    pub const IMAGEMD5: &str = "imagemd5";
83    pub const LPAGENO: &str = "lpageno";
84    pub const PPAGENO: &str = "ppageno";
85    pub const NLP: &str = "nlp";
86    pub const ORDER: &str = "order";
87    pub const POLY: &str = "poly";
88    pub const SCAN_RES: &str = "scan_res";
89    pub const TEXTANGLE: &str = "textangle";
90    pub const X_BBOXES: &str = "x_bboxes";
91    pub const X_FONT: &str = "x_font";
92    pub const X_FSIZE: &str = "x_fsize";
93    pub const X_CONFS: &str = "x_confs";
94    pub const X_SCANNER: &str = "x_scanner";
95    pub const X_SOURCE: &str = "x_source";
96    pub const X_WCONF: &str = "x_wconf";
97}
98
99/// All hOCR element types as variables for auto-complete.
100pub mod elements {
101    pub use float::*;
102    pub use inline::*;
103    pub use logical::*;
104    pub use ocr_engine_specific::*;
105    pub use typesetting::*;
106
107    pub mod typesetting {
108        pub const OCR_PAGE: &str = "ocr_page";
109        pub const OCR_COLUMN: &str = "ocr_column";
110        pub const OCR_CAREA: &str = "ocr_carea";
111        pub const OCR_LINE: &str = "ocr_line";
112        pub const OCR_SEPERATOR: &str = "ocr_seperator";
113        pub const OCR_NOISE: &str = "ocr_noise";
114    }
115    
116    pub mod float {
117        pub const OCR_FLOAT: &str = "ocr_float";
118        pub const OCR_TEXTFLOAT: &str = "ocr_textfloat";
119        pub const OCR_TEXTIMAGE: &str = "ocr_textimage";
120        pub const OCR_IMAGE: &str = "ocr_image";
121        pub const OCR_LINEDRAWING: &str = "ocr_linedrawing";
122        pub const OCR_PHOTO: &str = "ocr_photo";
123        pub const OCR_HEADER: &str = "ocr_header";
124        pub const OCR_FOOTER: &str = "ocr_footer";
125        pub const OCR_PAGENO: &str = "ocr_pageno";
126        pub const OCR_TABLE: &str = "ocr_table";
127    }
128    
129    pub mod logical {
130        pub const OCR_DOCUMENT: &str = "ocr_document";
131        pub const OCR_TITLE: &str = "ocr_title";
132        pub const OCR_AUTHOR: &str = "ocr_author";
133        pub const OCR_ABSTRACT: &str = "ocr_abstract";
134        pub const OCR_PART: &str = "ocr_part";
135        pub const OCR_CHAPTER: &str = "ocr_chapter";
136        pub const OCR_SECTION: &str = "ocr_section";
137        pub const OCR_SUBSECTION: &str = "ocr_subsection";
138        pub const OCR_SUBSUBSECTION: &str = "ocr_subsubsection";
139        pub const OCR_DISPLAY: &str = "ocr_display";
140        pub const OCR_BLOCKQUOTE: &str = "ocr_blockquote";
141        pub const OCR_PAR: &str = "ocr_par";
142        pub const OCR_LINEAR: &str = "ocr_linear";
143        pub const OCR_CAPTION: &str = "ocr_caption";
144    }
145    
146    pub mod inline {
147        pub const OCR_GLYPH: &str = "ocr_glyph";
148        pub const OCR_GLYPHS: &str = "ocr_glyphs";
149        pub const OCR_DROPCAP: &str = "ocr_dropcap";
150        pub const OCR_MATH: &str = "ocr_math";
151        pub const OCR_CHEM: &str = "ocr_chem";
152        pub const OCR_CINFO: &str = "ocr_cinfo";
153    }
154    
155    pub mod ocr_engine_specific {
156        pub const OCRX_BLOCK: &str = "ocrx_block";
157        pub const OCRX_LINE: &str = "ocrx_line";
158        pub const OCRX_WORD: &str = "ocrx_word";
159    }    
160}