1use super::{LineData, HtmlEngine};
4
5pub struct HtmlFormatter {
7 engine: HtmlEngine,
8 css_styling: bool,
9 accessibility: bool,
10 responsive: bool,
11 theme: HtmlTheme,
12}
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum HtmlTheme {
16 Light,
17 Dark,
18 Auto,
19}
20
21impl HtmlFormatter {
22 pub fn new() -> Self {
23 Self {
24 engine: HtmlEngine::MathJax,
25 css_styling: true,
26 accessibility: true,
27 responsive: true,
28 theme: HtmlTheme::Light,
29 }
30 }
31
32 pub fn with_engine(mut self, engine: HtmlEngine) -> Self {
33 self.engine = engine;
34 self
35 }
36
37 pub fn with_styling(mut self, styling: bool) -> Self {
38 self.css_styling = styling;
39 self
40 }
41
42 pub fn accessibility(mut self, enabled: bool) -> Self {
43 self.accessibility = enabled;
44 self
45 }
46
47 pub fn responsive(mut self, enabled: bool) -> Self {
48 self.responsive = enabled;
49 self
50 }
51
52 pub fn theme(mut self, theme: HtmlTheme) -> Self {
53 self.theme = theme;
54 self
55 }
56
57 pub fn format(&self, content: &str, lines: Option<&[LineData]>) -> String {
59 let mut html = String::new();
60
61 html.push_str(&self.html_header());
63
64 html.push_str("<body");
66 if self.css_styling {
67 html.push_str(&format!(r#" class="theme-{:?}""#, self.theme).to_lowercase());
68 }
69 html.push_str(">\n");
70
71 html.push_str(r#"<div class="content">"#);
73 html.push_str("\n");
74
75 if let Some(line_data) = lines {
77 html.push_str(&self.format_lines(line_data));
78 } else {
79 html.push_str(&self.format_text(content));
80 }
81
82 html.push_str("</div>\n");
83 html.push_str("</body>\n</html>");
84
85 html
86 }
87
88 fn html_header(&self) -> String {
90 let mut header = String::from("<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n");
91 header.push_str(r#" <meta charset="UTF-8">"#);
92 header.push_str("\n");
93
94 if self.responsive {
95 header.push_str(r#" <meta name="viewport" content="width=device-width, initial-scale=1.0">"#);
96 header.push_str("\n");
97 }
98
99 header.push_str(" <title>Mathematical Content</title>\n");
100
101 match self.engine {
103 HtmlEngine::MathJax => {
104 header.push_str(r#" <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>"#);
105 header.push_str("\n");
106 header.push_str(r#" <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>"#);
107 header.push_str("\n");
108 header.push_str(" <script>\n");
109 header.push_str(" MathJax = {\n");
110 header.push_str(" tex: {\n");
111 header.push_str(r#" inlineMath: [['$', '$'], ['\\(', '\\)']],"#);
112 header.push_str("\n");
113 header.push_str(r#" displayMath: [['$$', '$$'], ['\\[', '\\]']]"#);
114 header.push_str("\n");
115 header.push_str(" }\n");
116 header.push_str(" };\n");
117 header.push_str(" </script>\n");
118 }
119 HtmlEngine::KaTeX => {
120 header.push_str(r#" <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css">"#);
121 header.push_str("\n");
122 header.push_str(r#" <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"></script>"#);
123 header.push_str("\n");
124 header.push_str(r#" <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/contrib/auto-render.min.js" onload="renderMathInElement(document.body);"></script>"#);
125 header.push_str("\n");
126 }
127 HtmlEngine::Raw => {
128 }
130 }
131
132 if self.css_styling {
134 header.push_str(" <style>\n");
135 header.push_str(&self.generate_css());
136 header.push_str(" </style>\n");
137 }
138
139 header.push_str("</head>\n");
140 header
141 }
142
143 fn generate_css(&self) -> String {
145 let mut css = String::new();
146
147 css.push_str(" body {\n");
148 css.push_str(" font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;\n");
149 css.push_str(" line-height: 1.6;\n");
150 css.push_str(" max-width: 800px;\n");
151 css.push_str(" margin: 0 auto;\n");
152 css.push_str(" padding: 20px;\n");
153 css.push_str(" }\n");
154
155 match self.theme {
157 HtmlTheme::Light => {
158 css.push_str(" body.theme-light {\n");
159 css.push_str(" background-color: #ffffff;\n");
160 css.push_str(" color: #333333;\n");
161 css.push_str(" }\n");
162 }
163 HtmlTheme::Dark => {
164 css.push_str(" body.theme-dark {\n");
165 css.push_str(" background-color: #1e1e1e;\n");
166 css.push_str(" color: #d4d4d4;\n");
167 css.push_str(" }\n");
168 }
169 HtmlTheme::Auto => {
170 css.push_str(" @media (prefers-color-scheme: dark) {\n");
171 css.push_str(" body { background-color: #1e1e1e; color: #d4d4d4; }\n");
172 css.push_str(" }\n");
173 }
174 }
175
176 css.push_str(" .content { padding: 20px; }\n");
177 css.push_str(" .math-display { text-align: center; margin: 20px 0; }\n");
178 css.push_str(" .math-inline { display: inline; }\n");
179 css.push_str(" .equation-block { margin: 15px 0; padding: 10px; background: #f5f5f5; border-radius: 4px; }\n");
180 css.push_str(" table { border-collapse: collapse; width: 100%; margin: 20px 0; }\n");
181 css.push_str(" th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }\n");
182 css.push_str(" th { background-color: #f2f2f2; }\n");
183
184 if self.accessibility {
185 css.push_str(" .sr-only { position: absolute; width: 1px; height: 1px; padding: 0; margin: -1px; overflow: hidden; clip: rect(0,0,0,0); border: 0; }\n");
186 }
187
188 css
189 }
190
191 fn format_text(&self, text: &str) -> String {
193 let escaped = self.escape_html(text);
194
195 let mut html = escaped;
197
198 html = html.replace("$$", "<div class=\"math-display\">$$");
200 html = html.replace("$$", "$$</div>");
201
202 format!("<p>{}</p>", html)
206 }
207
208 fn format_lines(&self, lines: &[LineData]) -> String {
210 let mut html = String::new();
211
212 for line in lines {
213 match line.line_type.as_str() {
214 "text" => {
215 html.push_str("<p>");
216 html.push_str(&self.escape_html(&line.text));
217 html.push_str("</p>\n");
218 }
219 "math" | "equation" => {
220 let latex = line.latex.as_ref().unwrap_or(&line.text);
221 html.push_str(r#"<div class="math-display">"#);
222 if self.accessibility {
223 html.push_str(&format!(
224 r#"<span class="sr-only">Equation: {}</span>"#,
225 self.escape_html(&line.text)
226 ));
227 }
228 html.push_str(&format!("$${}$$", latex));
229 html.push_str("</div>\n");
230 }
231 "inline_math" => {
232 let latex = line.latex.as_ref().unwrap_or(&line.text);
233 html.push_str(&format!(r#"<span class="math-inline">${}$</span>"#, latex));
234 }
235 "heading" => {
236 html.push_str(&format!("<h2>{}</h2>\n", self.escape_html(&line.text)));
237 }
238 "table" => {
239 html.push_str(&self.format_table(&line.text));
240 }
241 "image" => {
242 html.push_str(&format!(
243 r#"<img src="{}" alt="Image" loading="lazy">"#,
244 self.escape_html(&line.text)
245 ));
246 html.push_str("\n");
247 }
248 _ => {
249 html.push_str("<p>");
250 html.push_str(&self.escape_html(&line.text));
251 html.push_str("</p>\n");
252 }
253 }
254 }
255
256 html
257 }
258
259 fn format_table(&self, table: &str) -> String {
261 let mut html = String::from("<table>\n");
262
263 let rows: Vec<&str> = table.lines().collect();
264 for (i, row) in rows.iter().enumerate() {
265 html.push_str(" <tr>\n");
266
267 let cells: Vec<&str> = row.split('|')
268 .map(|s| s.trim())
269 .filter(|s| !s.is_empty())
270 .collect();
271
272 let tag = if i == 0 { "th" } else { "td" };
273
274 for cell in cells {
275 html.push_str(&format!(" <{}>{}</{}>\n", tag, self.escape_html(cell), tag));
276 }
277
278 html.push_str(" </tr>\n");
279 }
280
281 html.push_str("</table>\n");
282 html
283 }
284
285 fn escape_html(&self, text: &str) -> String {
287 text.replace('&', "&")
288 .replace('<', "<")
289 .replace('>', ">")
290 .replace('"', """)
291 .replace('\'', "'")
292 }
293}
294
295impl Default for HtmlFormatter {
296 fn default() -> Self {
297 Self::new()
298 }
299}
300
301#[cfg(test)]
302mod tests {
303 use super::*;
304 use crate::output::BoundingBox;
305
306 #[test]
307 fn test_html_header() {
308 let formatter = HtmlFormatter::new().with_engine(HtmlEngine::MathJax);
309 let header = formatter.html_header();
310
311 assert!(header.contains("<!DOCTYPE html>"));
312 assert!(header.contains("MathJax"));
313 }
314
315 #[test]
316 fn test_katex_header() {
317 let formatter = HtmlFormatter::new().with_engine(HtmlEngine::KaTeX);
318 let header = formatter.html_header();
319
320 assert!(header.contains("katex"));
321 }
322
323 #[test]
324 fn test_escape_html() {
325 let formatter = HtmlFormatter::new();
326 let result = formatter.escape_html("<script>alert('test')</script>");
327
328 assert!(result.contains("<"));
329 assert!(result.contains(">"));
330 assert!(!result.contains("<script>"));
331 }
332
333 #[test]
334 fn test_format_lines() {
335 let formatter = HtmlFormatter::new();
336 let lines = vec![
337 LineData {
338 line_type: "text".to_string(),
339 text: "Introduction".to_string(),
340 latex: None,
341 bbox: BoundingBox::new(0.0, 0.0, 100.0, 20.0),
342 confidence: 0.95,
343 words: None,
344 },
345 LineData {
346 line_type: "equation".to_string(),
347 text: "E = mc^2".to_string(),
348 latex: Some(r"E = mc^2".to_string()),
349 bbox: BoundingBox::new(0.0, 25.0, 100.0, 30.0),
350 confidence: 0.98,
351 words: None,
352 },
353 ];
354
355 let result = formatter.format_lines(&lines);
356 assert!(result.contains("<p>Introduction</p>"));
357 assert!(result.contains("math-display"));
358 assert!(result.contains("$$"));
359 }
360
361 #[test]
362 fn test_dark_theme() {
363 let formatter = HtmlFormatter::new().theme(HtmlTheme::Dark);
364 let css = formatter.generate_css();
365
366 assert!(css.contains("theme-dark"));
367 assert!(css.contains("#1e1e1e"));
368 }
369
370 #[test]
371 fn test_accessibility() {
372 let formatter = HtmlFormatter::new().accessibility(true);
373 let lines = vec![
374 LineData {
375 line_type: "equation".to_string(),
376 text: "x squared".to_string(),
377 latex: Some("x^2".to_string()),
378 bbox: BoundingBox::new(0.0, 0.0, 100.0, 20.0),
379 confidence: 0.98,
380 words: None,
381 },
382 ];
383
384 let result = formatter.format_lines(&lines);
385 assert!(result.contains("sr-only"));
386 assert!(result.contains("Equation:"));
387 }
388}