use serde::{Deserialize, Serialize};
use std::collections::HashMap;
pub mod docx;
pub mod formatter;
pub mod html;
pub mod json;
pub mod latex;
pub mod mmd;
pub mod smiles;
pub use formatter::{HtmlEngine, MathDelimiters, OutputFormatter};
pub use json::ApiResponse;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum OutputFormat {
Text,
#[serde(rename = "latex_normal")]
LaTeX,
#[serde(rename = "latex_styled")]
LaTeXStyled,
#[serde(rename = "mathml")]
MathML,
#[serde(rename = "mmd")]
Mmd,
#[serde(rename = "asciimath")]
AsciiMath,
Html,
#[serde(rename = "smiles")]
Smiles,
Docx,
}
impl OutputFormat {
pub fn extension(&self) -> &'static str {
match self {
OutputFormat::Text => "txt",
OutputFormat::LaTeX | OutputFormat::LaTeXStyled => "tex",
OutputFormat::MathML => "xml",
OutputFormat::Mmd => "mmd",
OutputFormat::AsciiMath => "txt",
OutputFormat::Html => "html",
OutputFormat::Smiles => "smi",
OutputFormat::Docx => "docx",
}
}
pub fn mime_type(&self) -> &'static str {
match self {
OutputFormat::Text | OutputFormat::AsciiMath => "text/plain",
OutputFormat::LaTeX | OutputFormat::LaTeXStyled => "application/x-latex",
OutputFormat::MathML => "application/mathml+xml",
OutputFormat::Mmd => "text/markdown",
OutputFormat::Html => "text/html",
OutputFormat::Smiles => "chemical/x-daylight-smiles",
OutputFormat::Docx => {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
}
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OcrResult {
pub request_id: String,
pub version: String,
pub image_width: u32,
pub image_height: u32,
pub is_printed: bool,
pub is_handwritten: bool,
pub auto_rotate_confidence: f32,
pub auto_rotate_degrees: i32,
pub confidence: f32,
pub confidence_rate: f32,
pub formats: FormatsData,
#[serde(skip_serializing_if = "Option::is_none")]
pub line_data: Option<Vec<LineData>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(flatten)]
pub metadata: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct FormatsData {
#[serde(skip_serializing_if = "Option::is_none")]
pub text: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub latex_normal: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub latex_styled: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub latex_simplified: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mathml: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub asciimath: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mmd: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub html: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub smiles: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LineData {
#[serde(rename = "type")]
pub line_type: String,
pub text: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub latex: Option<String>,
pub bbox: BoundingBox,
pub confidence: f32,
#[serde(skip_serializing_if = "Option::is_none")]
pub words: Option<Vec<WordData>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WordData {
pub text: String,
pub bbox: BoundingBox,
pub confidence: f32,
#[serde(skip_serializing_if = "Option::is_none")]
pub latex: Option<String>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct BoundingBox {
pub x: f32,
pub y: f32,
pub width: f32,
pub height: f32,
}
impl BoundingBox {
pub fn new(x: f32, y: f32, width: f32, height: f32) -> Self {
Self {
x,
y,
width,
height,
}
}
pub fn area(&self) -> f32 {
self.width * self.height
}
pub fn center(&self) -> (f32, f32) {
(self.x + self.width / 2.0, self.y + self.height / 2.0)
}
}
pub fn convert_format(
content: &str,
from: OutputFormat,
to: OutputFormat,
) -> Result<String, String> {
if from == to {
return Ok(content.to_string());
}
match (from, to) {
(OutputFormat::LaTeX, OutputFormat::Text) => {
Ok(strip_latex(content))
}
(OutputFormat::Mmd, OutputFormat::LaTeX) => {
Ok(extract_latex_from_mmd(content))
}
(OutputFormat::LaTeX, OutputFormat::Html) => {
Ok(format!(
r#"<!DOCTYPE html>
<html>
<head>
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
</head>
<body>
<p>\({}\)</p>
</body>
</html>"#,
content
))
}
_ => Err(format!(
"Conversion from {:?} to {:?} not supported",
from, to
)),
}
}
fn strip_latex(content: &str) -> String {
let mut result = content.to_string();
result = result.replace("\\(", "").replace("\\)", "");
result = result.replace("\\[", "").replace("\\]", "");
result = result.replace("$$", "");
for cmd in &["\\text", "\\mathrm", "\\mathbf", "\\mathit"] {
result = result.replace(&format!("{}{}", cmd, "{"), "");
}
result = result.replace("}", "");
for cmd in &["\\\\", "\\,", "\\;", "\\:", "\\!", "\\quad", "\\qquad"] {
result = result.replace(cmd, " ");
}
result.trim().to_string()
}
fn extract_latex_from_mmd(content: &str) -> String {
let mut latex_parts = Vec::new();
let mut in_math = false;
let mut current = String::new();
let chars: Vec<char> = content.chars().collect();
let mut i = 0;
while i < chars.len() {
if i + 1 < chars.len() && chars[i] == '$' && chars[i + 1] == '$' {
if in_math {
latex_parts.push(current.clone());
current.clear();
in_math = false;
} else {
in_math = true;
}
i += 2;
} else if chars[i] == '$' {
in_math = !in_math;
i += 1;
} else if in_math {
current.push(chars[i]);
i += 1;
} else {
i += 1;
}
}
latex_parts.join("\n\n")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_output_format_extension() {
assert_eq!(OutputFormat::Text.extension(), "txt");
assert_eq!(OutputFormat::LaTeX.extension(), "tex");
assert_eq!(OutputFormat::Html.extension(), "html");
assert_eq!(OutputFormat::Mmd.extension(), "mmd");
}
#[test]
fn test_output_format_mime_type() {
assert_eq!(OutputFormat::Text.mime_type(), "text/plain");
assert_eq!(OutputFormat::LaTeX.mime_type(), "application/x-latex");
assert_eq!(OutputFormat::Html.mime_type(), "text/html");
}
#[test]
fn test_bounding_box() {
let bbox = BoundingBox::new(10.0, 20.0, 100.0, 50.0);
assert_eq!(bbox.area(), 5000.0);
assert_eq!(bbox.center(), (60.0, 45.0));
}
#[test]
fn test_strip_latex() {
let input = r"\text{Hello } \mathbf{World}";
let output = strip_latex(input);
assert!(output.contains("Hello"));
assert!(output.contains("World"));
}
#[test]
fn test_convert_same_format() {
let content = "test content";
let result = convert_format(content, OutputFormat::Text, OutputFormat::Text).unwrap();
assert_eq!(result, content);
}
}