1use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15pub mod formatter;
16pub mod mmd;
17pub mod latex;
18pub mod html;
19pub mod docx;
20pub mod json;
21pub mod smiles;
22
23pub use formatter::{OutputFormatter, MathDelimiters, HtmlEngine};
24pub use json::ApiResponse;
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
28#[serde(rename_all = "snake_case")]
29pub enum OutputFormat {
30 Text,
32 #[serde(rename = "latex_normal")]
34 LaTeX,
35 #[serde(rename = "latex_styled")]
37 LaTeXStyled,
38 #[serde(rename = "mathml")]
40 MathML,
41 #[serde(rename = "mmd")]
43 Mmd,
44 #[serde(rename = "asciimath")]
46 AsciiMath,
47 Html,
49 #[serde(rename = "smiles")]
51 Smiles,
52 Docx,
54}
55
56impl OutputFormat {
57 pub fn extension(&self) -> &'static str {
59 match self {
60 OutputFormat::Text => "txt",
61 OutputFormat::LaTeX | OutputFormat::LaTeXStyled => "tex",
62 OutputFormat::MathML => "xml",
63 OutputFormat::Mmd => "mmd",
64 OutputFormat::AsciiMath => "txt",
65 OutputFormat::Html => "html",
66 OutputFormat::Smiles => "smi",
67 OutputFormat::Docx => "docx",
68 }
69 }
70
71 pub fn mime_type(&self) -> &'static str {
73 match self {
74 OutputFormat::Text | OutputFormat::AsciiMath => "text/plain",
75 OutputFormat::LaTeX | OutputFormat::LaTeXStyled => "application/x-latex",
76 OutputFormat::MathML => "application/mathml+xml",
77 OutputFormat::Mmd => "text/markdown",
78 OutputFormat::Html => "text/html",
79 OutputFormat::Smiles => "chemical/x-daylight-smiles",
80 OutputFormat::Docx => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
81 }
82 }
83}
84
85#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct OcrResult {
88 pub request_id: String,
90
91 pub version: String,
93
94 pub image_width: u32,
96 pub image_height: u32,
97
98 pub is_printed: bool,
100 pub is_handwritten: bool,
101 pub auto_rotate_confidence: f32,
102 pub auto_rotate_degrees: i32,
103
104 pub confidence: f32,
106 pub confidence_rate: f32,
107
108 pub formats: FormatsData,
110
111 #[serde(skip_serializing_if = "Option::is_none")]
113 pub line_data: Option<Vec<LineData>>,
114
115 #[serde(skip_serializing_if = "Option::is_none")]
117 pub error: Option<String>,
118
119 #[serde(flatten)]
121 pub metadata: HashMap<String, serde_json::Value>,
122}
123
124#[derive(Debug, Clone, Default, Serialize, Deserialize)]
126pub struct FormatsData {
127 #[serde(skip_serializing_if = "Option::is_none")]
128 pub text: Option<String>,
129
130 #[serde(skip_serializing_if = "Option::is_none")]
131 pub latex_normal: Option<String>,
132
133 #[serde(skip_serializing_if = "Option::is_none")]
134 pub latex_styled: Option<String>,
135
136 #[serde(skip_serializing_if = "Option::is_none")]
137 pub latex_simplified: Option<String>,
138
139 #[serde(skip_serializing_if = "Option::is_none")]
140 pub mathml: Option<String>,
141
142 #[serde(skip_serializing_if = "Option::is_none")]
143 pub asciimath: Option<String>,
144
145 #[serde(skip_serializing_if = "Option::is_none")]
146 pub mmd: Option<String>,
147
148 #[serde(skip_serializing_if = "Option::is_none")]
149 pub html: Option<String>,
150
151 #[serde(skip_serializing_if = "Option::is_none")]
152 pub smiles: Option<String>,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct LineData {
158 #[serde(rename = "type")]
160 pub line_type: String,
161
162 pub text: String,
164
165 #[serde(skip_serializing_if = "Option::is_none")]
166 pub latex: Option<String>,
167
168 pub bbox: BoundingBox,
170
171 pub confidence: f32,
173
174 #[serde(skip_serializing_if = "Option::is_none")]
176 pub words: Option<Vec<WordData>>,
177}
178
179#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct WordData {
182 pub text: String,
183 pub bbox: BoundingBox,
184 pub confidence: f32,
185
186 #[serde(skip_serializing_if = "Option::is_none")]
187 pub latex: Option<String>,
188}
189
190#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
192pub struct BoundingBox {
193 pub x: f32,
194 pub y: f32,
195 pub width: f32,
196 pub height: f32,
197}
198
199impl BoundingBox {
200 pub fn new(x: f32, y: f32, width: f32, height: f32) -> Self {
201 Self { x, y, width, height }
202 }
203
204 pub fn area(&self) -> f32 {
205 self.width * self.height
206 }
207
208 pub fn center(&self) -> (f32, f32) {
209 (self.x + self.width / 2.0, self.y + self.height / 2.0)
210 }
211}
212
213pub fn convert_format(content: &str, from: OutputFormat, to: OutputFormat) -> Result<String, String> {
215 if from == to {
217 return Ok(content.to_string());
218 }
219
220 match (from, to) {
222 (OutputFormat::LaTeX, OutputFormat::Text) => {
223 Ok(strip_latex(content))
225 }
226 (OutputFormat::Mmd, OutputFormat::LaTeX) => {
227 Ok(extract_latex_from_mmd(content))
229 }
230 (OutputFormat::LaTeX, OutputFormat::Html) => {
231 Ok(format!(
233 r#"<!DOCTYPE html>
234<html>
235<head>
236 <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
237 <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
238</head>
239<body>
240 <p>\({}\)</p>
241</body>
242</html>"#,
243 content
244 ))
245 }
246 _ => Err(format!("Conversion from {:?} to {:?} not supported", from, to)),
247 }
248}
249
250fn strip_latex(content: &str) -> String {
251 let mut result = content.to_string();
253
254 result = result.replace("\\(", "").replace("\\)", "");
256 result = result.replace("\\[", "").replace("\\]", "");
257 result = result.replace("$$", "");
258
259 for cmd in &["\\text", "\\mathrm", "\\mathbf", "\\mathit"] {
261 result = result.replace(&format!("{}{}", cmd, "{"), "");
262 }
263 result = result.replace("}", "");
264
265 for cmd in &["\\\\", "\\,", "\\;", "\\:", "\\!", "\\quad", "\\qquad"] {
267 result = result.replace(cmd, " ");
268 }
269
270 result.trim().to_string()
271}
272
273fn extract_latex_from_mmd(content: &str) -> String {
274 let mut latex_parts = Vec::new();
275 let mut in_math = false;
276 let mut current = String::new();
277
278 let chars: Vec<char> = content.chars().collect();
279 let mut i = 0;
280
281 while i < chars.len() {
282 if i + 1 < chars.len() && chars[i] == '$' && chars[i + 1] == '$' {
283 if in_math {
284 latex_parts.push(current.clone());
285 current.clear();
286 in_math = false;
287 } else {
288 in_math = true;
289 }
290 i += 2;
291 } else if chars[i] == '$' {
292 in_math = !in_math;
293 i += 1;
294 } else if in_math {
295 current.push(chars[i]);
296 i += 1;
297 } else {
298 i += 1;
299 }
300 }
301
302 latex_parts.join("\n\n")
303}
304
305#[cfg(test)]
306mod tests {
307 use super::*;
308
309 #[test]
310 fn test_output_format_extension() {
311 assert_eq!(OutputFormat::Text.extension(), "txt");
312 assert_eq!(OutputFormat::LaTeX.extension(), "tex");
313 assert_eq!(OutputFormat::Html.extension(), "html");
314 assert_eq!(OutputFormat::Mmd.extension(), "mmd");
315 }
316
317 #[test]
318 fn test_output_format_mime_type() {
319 assert_eq!(OutputFormat::Text.mime_type(), "text/plain");
320 assert_eq!(OutputFormat::LaTeX.mime_type(), "application/x-latex");
321 assert_eq!(OutputFormat::Html.mime_type(), "text/html");
322 }
323
324 #[test]
325 fn test_bounding_box() {
326 let bbox = BoundingBox::new(10.0, 20.0, 100.0, 50.0);
327 assert_eq!(bbox.area(), 5000.0);
328 assert_eq!(bbox.center(), (60.0, 45.0));
329 }
330
331 #[test]
332 fn test_strip_latex() {
333 let input = r"\text{Hello } \mathbf{World}";
334 let output = strip_latex(input);
335 assert!(output.contains("Hello"));
336 assert!(output.contains("World"));
337 }
338
339 #[test]
340 fn test_convert_same_format() {
341 let content = "test content";
342 let result = convert_format(content, OutputFormat::Text, OutputFormat::Text).unwrap();
343 assert_eq!(result, content);
344 }
345}