1use super::*;
4use crate::output::{html, latex, mmd, smiles};
5use std::io::Write;
6
7#[derive(Debug, Clone)]
9pub struct FormatterConfig {
10 pub formats: Vec<OutputFormat>,
12
13 pub pretty: bool,
15
16 pub include_confidence: bool,
18
19 pub include_bbox: bool,
21
22 pub math_delimiters: MathDelimiters,
24
25 pub html_engine: HtmlEngine,
27
28 pub streaming: bool,
30}
31
32impl Default for FormatterConfig {
33 fn default() -> Self {
34 Self {
35 formats: vec![OutputFormat::Text],
36 pretty: true,
37 include_confidence: false,
38 include_bbox: false,
39 math_delimiters: MathDelimiters::default(),
40 html_engine: HtmlEngine::MathJax,
41 streaming: false,
42 }
43 }
44}
45
46#[derive(Debug, Clone)]
48pub struct MathDelimiters {
49 pub inline_start: String,
50 pub inline_end: String,
51 pub display_start: String,
52 pub display_end: String,
53}
54
55impl Default for MathDelimiters {
56 fn default() -> Self {
57 Self {
58 inline_start: "$".to_string(),
59 inline_end: "$".to_string(),
60 display_start: "$$".to_string(),
61 display_end: "$$".to_string(),
62 }
63 }
64}
65
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum HtmlEngine {
69 MathJax,
70 KaTeX,
71 Raw,
72}
73
74pub struct OutputFormatter {
76 config: FormatterConfig,
77}
78
79impl OutputFormatter {
80 pub fn new() -> Self {
82 Self {
83 config: FormatterConfig::default(),
84 }
85 }
86
87 pub fn with_config(config: FormatterConfig) -> Self {
89 Self { config }
90 }
91
92 pub fn format_result(&self, result: &OcrResult) -> Result<FormatsData, String> {
94 let mut formats = FormatsData::default();
95
96 for format in &self.config.formats {
97 let output = self.format_single(result, *format)?;
98 self.set_format_output(&mut formats, *format, output);
99 }
100
101 Ok(formats)
102 }
103
104 pub fn format_batch(&self, results: &[OcrResult]) -> Result<Vec<FormatsData>, String> {
106 results
107 .iter()
108 .map(|result| self.format_result(result))
109 .collect()
110 }
111
112 pub fn format_stream<W: Write>(
114 &self,
115 results: &[OcrResult],
116 writer: &mut W,
117 format: OutputFormat,
118 ) -> Result<(), String> {
119 for (i, result) in results.iter().enumerate() {
120 let output = self.format_single(result, format)?;
121 writer
122 .write_all(output.as_bytes())
123 .map_err(|e| format!("Write error: {}", e))?;
124
125 if i < results.len() - 1 {
127 writer
128 .write_all(b"\n\n---\n\n")
129 .map_err(|e| format!("Write error: {}", e))?;
130 }
131 }
132
133 Ok(())
134 }
135
136 fn format_single(&self, result: &OcrResult, format: OutputFormat) -> Result<String, String> {
138 match format {
139 OutputFormat::Text => self.format_text(result),
140 OutputFormat::LaTeX => self.format_latex(result, false),
141 OutputFormat::LaTeXStyled => self.format_latex(result, true),
142 OutputFormat::Mmd => self.format_mmd(result),
143 OutputFormat::Html => self.format_html(result),
144 OutputFormat::Smiles => self.format_smiles(result),
145 OutputFormat::Docx => self.format_docx(result),
146 OutputFormat::MathML => self.format_mathml(result),
147 OutputFormat::AsciiMath => self.format_asciimath(result),
148 }
149 }
150
151 fn format_text(&self, result: &OcrResult) -> Result<String, String> {
152 if let Some(text) = &result.formats.text {
153 return Ok(text.clone());
154 }
155
156 if let Some(line_data) = &result.line_data {
158 let text = line_data
159 .iter()
160 .map(|line| line.text.as_str())
161 .collect::<Vec<_>>()
162 .join("\n");
163 return Ok(text);
164 }
165
166 Err("No text content available".to_string())
167 }
168
169 fn format_latex(&self, result: &OcrResult, styled: bool) -> Result<String, String> {
170 let latex_content = if styled {
171 result.formats.latex_styled.as_ref()
172 .or(result.formats.latex_normal.as_ref())
173 } else {
174 result.formats.latex_normal.as_ref()
175 };
176
177 if let Some(latex) = latex_content {
178 if styled {
179 Ok(latex::LaTeXFormatter::new()
181 .with_packages(vec![
182 "amsmath".to_string(),
183 "amssymb".to_string(),
184 "graphicx".to_string(),
185 ])
186 .format_document(latex))
187 } else {
188 Ok(latex.clone())
189 }
190 } else {
191 Err("No LaTeX content available".to_string())
192 }
193 }
194
195 fn format_mmd(&self, result: &OcrResult) -> Result<String, String> {
196 if let Some(mmd) = &result.formats.mmd {
197 return Ok(mmd.clone());
198 }
199
200 if let Some(line_data) = &result.line_data {
202 let formatter = mmd::MmdFormatter::with_delimiters(
203 self.config.math_delimiters.clone()
204 );
205 return Ok(formatter.format(line_data));
206 }
207
208 Err("No MMD content available".to_string())
209 }
210
211 fn format_html(&self, result: &OcrResult) -> Result<String, String> {
212 if let Some(html) = &result.formats.html {
213 return Ok(html.clone());
214 }
215
216 let content = self.format_text(result)?;
218 let formatter = html::HtmlFormatter::new()
219 .with_engine(self.config.html_engine)
220 .with_styling(self.config.pretty);
221
222 Ok(formatter.format(&content, result.line_data.as_deref()))
223 }
224
225 fn format_smiles(&self, result: &OcrResult) -> Result<String, String> {
226 if let Some(smiles) = &result.formats.smiles {
227 return Ok(smiles.clone());
228 }
229
230 let generator = smiles::SmilesGenerator::new();
232 generator.generate_from_result(result)
233 }
234
235 fn format_docx(&self, _result: &OcrResult) -> Result<String, String> {
236 Err("DOCX format requires binary output - use save_docx() instead".to_string())
238 }
239
240 fn format_mathml(&self, result: &OcrResult) -> Result<String, String> {
241 if let Some(mathml) = &result.formats.mathml {
242 return Ok(mathml.clone());
243 }
244
245 Err("MathML generation not yet implemented".to_string())
246 }
247
248 fn format_asciimath(&self, result: &OcrResult) -> Result<String, String> {
249 if let Some(asciimath) = &result.formats.asciimath {
250 return Ok(asciimath.clone());
251 }
252
253 Err("AsciiMath conversion not yet implemented".to_string())
254 }
255
256 fn set_format_output(&self, formats: &mut FormatsData, format: OutputFormat, output: String) {
257 match format {
258 OutputFormat::Text => formats.text = Some(output),
259 OutputFormat::LaTeX => formats.latex_normal = Some(output),
260 OutputFormat::LaTeXStyled => formats.latex_styled = Some(output),
261 OutputFormat::Mmd => formats.mmd = Some(output),
262 OutputFormat::Html => formats.html = Some(output),
263 OutputFormat::Smiles => formats.smiles = Some(output),
264 OutputFormat::MathML => formats.mathml = Some(output),
265 OutputFormat::AsciiMath => formats.asciimath = Some(output),
266 OutputFormat::Docx => {}, }
268 }
269}
270
271impl Default for OutputFormatter {
272 fn default() -> Self {
273 Self::new()
274 }
275}
276
277pub struct FormatterBuilder {
279 config: FormatterConfig,
280}
281
282impl FormatterBuilder {
283 pub fn new() -> Self {
284 Self {
285 config: FormatterConfig::default(),
286 }
287 }
288
289 pub fn formats(mut self, formats: Vec<OutputFormat>) -> Self {
290 self.config.formats = formats;
291 self
292 }
293
294 pub fn add_format(mut self, format: OutputFormat) -> Self {
295 self.config.formats.push(format);
296 self
297 }
298
299 pub fn pretty(mut self, pretty: bool) -> Self {
300 self.config.pretty = pretty;
301 self
302 }
303
304 pub fn include_confidence(mut self, include: bool) -> Self {
305 self.config.include_confidence = include;
306 self
307 }
308
309 pub fn include_bbox(mut self, include: bool) -> Self {
310 self.config.include_bbox = include;
311 self
312 }
313
314 pub fn math_delimiters(mut self, delimiters: MathDelimiters) -> Self {
315 self.config.math_delimiters = delimiters;
316 self
317 }
318
319 pub fn html_engine(mut self, engine: HtmlEngine) -> Self {
320 self.config.html_engine = engine;
321 self
322 }
323
324 pub fn streaming(mut self, streaming: bool) -> Self {
325 self.config.streaming = streaming;
326 self
327 }
328
329 pub fn build(self) -> OutputFormatter {
330 OutputFormatter::with_config(self.config)
331 }
332}
333
334impl Default for FormatterBuilder {
335 fn default() -> Self {
336 Self::new()
337 }
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343
344 fn create_test_result() -> OcrResult {
345 OcrResult {
346 request_id: "test_123".to_string(),
347 version: "3.0".to_string(),
348 image_width: 800,
349 image_height: 600,
350 is_printed: true,
351 is_handwritten: false,
352 auto_rotate_confidence: 0.95,
353 auto_rotate_degrees: 0,
354 confidence: 0.98,
355 confidence_rate: 0.97,
356 formats: FormatsData {
357 text: Some("E = mc^2".to_string()),
358 latex_normal: Some(r"E = mc^2".to_string()),
359 ..Default::default()
360 },
361 line_data: None,
362 error: None,
363 metadata: HashMap::new(),
364 }
365 }
366
367 #[test]
368 fn test_format_text() {
369 let formatter = OutputFormatter::new();
370 let result = create_test_result();
371
372 let output = formatter.format_single(&result, OutputFormat::Text).unwrap();
373 assert_eq!(output, "E = mc^2");
374 }
375
376 #[test]
377 fn test_format_latex() {
378 let formatter = OutputFormatter::new();
379 let result = create_test_result();
380
381 let output = formatter.format_single(&result, OutputFormat::LaTeX).unwrap();
382 assert!(output.contains("mc^2"));
383 }
384
385 #[test]
386 fn test_builder() {
387 let formatter = FormatterBuilder::new()
388 .add_format(OutputFormat::Text)
389 .add_format(OutputFormat::LaTeX)
390 .pretty(true)
391 .include_confidence(true)
392 .build();
393
394 assert_eq!(formatter.config.formats.len(), 2);
395 assert!(formatter.config.pretty);
396 assert!(formatter.config.include_confidence);
397 }
398
399 #[test]
400 fn test_batch_format() {
401 let formatter = OutputFormatter::new();
402 let results = vec![create_test_result(), create_test_result()];
403
404 let outputs = formatter.format_batch(&results).unwrap();
405 assert_eq!(outputs.len(), 2);
406 }
407}