ruvector_scipix/output/
html.rs

1//! HTML output formatter with math rendering support
2
3use super::{LineData, HtmlEngine};
4
5/// HTML formatter with math rendering
6pub struct HtmlFormatter {
7    engine: HtmlEngine,
8    css_styling: bool,
9    accessibility: bool,
10    responsive: bool,
11    theme: HtmlTheme,
12}
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum HtmlTheme {
16    Light,
17    Dark,
18    Auto,
19}
20
21impl HtmlFormatter {
22    pub fn new() -> Self {
23        Self {
24            engine: HtmlEngine::MathJax,
25            css_styling: true,
26            accessibility: true,
27            responsive: true,
28            theme: HtmlTheme::Light,
29        }
30    }
31
32    pub fn with_engine(mut self, engine: HtmlEngine) -> Self {
33        self.engine = engine;
34        self
35    }
36
37    pub fn with_styling(mut self, styling: bool) -> Self {
38        self.css_styling = styling;
39        self
40    }
41
42    pub fn accessibility(mut self, enabled: bool) -> Self {
43        self.accessibility = enabled;
44        self
45    }
46
47    pub fn responsive(mut self, enabled: bool) -> Self {
48        self.responsive = enabled;
49        self
50    }
51
52    pub fn theme(mut self, theme: HtmlTheme) -> Self {
53        self.theme = theme;
54        self
55    }
56
57    /// Format content to HTML
58    pub fn format(&self, content: &str, lines: Option<&[LineData]>) -> String {
59        let mut html = String::new();
60
61        // HTML header with math rendering scripts
62        html.push_str(&self.html_header());
63
64        // Body start with theme class
65        html.push_str("<body");
66        if self.css_styling {
67            html.push_str(&format!(r#" class="theme-{:?}""#, self.theme).to_lowercase());
68        }
69        html.push_str(">\n");
70
71        // Main content container
72        html.push_str(r#"<div class="content">"#);
73        html.push_str("\n");
74
75        // Format content
76        if let Some(line_data) = lines {
77            html.push_str(&self.format_lines(line_data));
78        } else {
79            html.push_str(&self.format_text(content));
80        }
81
82        html.push_str("</div>\n");
83        html.push_str("</body>\n</html>");
84
85        html
86    }
87
88    /// Generate HTML header with scripts and styles
89    fn html_header(&self) -> String {
90        let mut header = String::from("<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n");
91        header.push_str(r#"    <meta charset="UTF-8">"#);
92        header.push_str("\n");
93
94        if self.responsive {
95            header.push_str(r#"    <meta name="viewport" content="width=device-width, initial-scale=1.0">"#);
96            header.push_str("\n");
97        }
98
99        header.push_str("    <title>Mathematical Content</title>\n");
100
101        // Math rendering scripts
102        match self.engine {
103            HtmlEngine::MathJax => {
104                header.push_str(r#"    <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>"#);
105                header.push_str("\n");
106                header.push_str(r#"    <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>"#);
107                header.push_str("\n");
108                header.push_str("    <script>\n");
109                header.push_str("    MathJax = {\n");
110                header.push_str("        tex: {\n");
111                header.push_str(r#"            inlineMath: [['$', '$'], ['\\(', '\\)']],"#);
112                header.push_str("\n");
113                header.push_str(r#"            displayMath: [['$$', '$$'], ['\\[', '\\]']]"#);
114                header.push_str("\n");
115                header.push_str("        }\n");
116                header.push_str("    };\n");
117                header.push_str("    </script>\n");
118            }
119            HtmlEngine::KaTeX => {
120                header.push_str(r#"    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css">"#);
121                header.push_str("\n");
122                header.push_str(r#"    <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"></script>"#);
123                header.push_str("\n");
124                header.push_str(r#"    <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/contrib/auto-render.min.js" onload="renderMathInElement(document.body);"></script>"#);
125                header.push_str("\n");
126            }
127            HtmlEngine::Raw => {
128                // No math rendering
129            }
130        }
131
132        // CSS styling
133        if self.css_styling {
134            header.push_str("    <style>\n");
135            header.push_str(&self.generate_css());
136            header.push_str("    </style>\n");
137        }
138
139        header.push_str("</head>\n");
140        header
141    }
142
143    /// Generate CSS styles
144    fn generate_css(&self) -> String {
145        let mut css = String::new();
146
147        css.push_str("        body {\n");
148        css.push_str("            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;\n");
149        css.push_str("            line-height: 1.6;\n");
150        css.push_str("            max-width: 800px;\n");
151        css.push_str("            margin: 0 auto;\n");
152        css.push_str("            padding: 20px;\n");
153        css.push_str("        }\n");
154
155        // Theme colors
156        match self.theme {
157            HtmlTheme::Light => {
158                css.push_str("        body.theme-light {\n");
159                css.push_str("            background-color: #ffffff;\n");
160                css.push_str("            color: #333333;\n");
161                css.push_str("        }\n");
162            }
163            HtmlTheme::Dark => {
164                css.push_str("        body.theme-dark {\n");
165                css.push_str("            background-color: #1e1e1e;\n");
166                css.push_str("            color: #d4d4d4;\n");
167                css.push_str("        }\n");
168            }
169            HtmlTheme::Auto => {
170                css.push_str("        @media (prefers-color-scheme: dark) {\n");
171                css.push_str("            body { background-color: #1e1e1e; color: #d4d4d4; }\n");
172                css.push_str("        }\n");
173            }
174        }
175
176        css.push_str("        .content { padding: 20px; }\n");
177        css.push_str("        .math-display { text-align: center; margin: 20px 0; }\n");
178        css.push_str("        .math-inline { display: inline; }\n");
179        css.push_str("        .equation-block { margin: 15px 0; padding: 10px; background: #f5f5f5; border-radius: 4px; }\n");
180        css.push_str("        table { border-collapse: collapse; width: 100%; margin: 20px 0; }\n");
181        css.push_str("        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }\n");
182        css.push_str("        th { background-color: #f2f2f2; }\n");
183
184        if self.accessibility {
185            css.push_str("        .sr-only { position: absolute; width: 1px; height: 1px; padding: 0; margin: -1px; overflow: hidden; clip: rect(0,0,0,0); border: 0; }\n");
186        }
187
188        css
189    }
190
191    /// Format plain text to HTML
192    fn format_text(&self, text: &str) -> String {
193        let escaped = self.escape_html(text);
194
195        // Convert math delimiters if present
196        let mut html = escaped;
197
198        // Display math $$...$$
199        html = html.replace("$$", "<div class=\"math-display\">$$");
200        html = html.replace("$$", "$$</div>");
201
202        // Inline math $...$
203        // This is simplistic - a real implementation would need proper parsing
204
205        format!("<p>{}</p>", html)
206    }
207
208    /// Format line data to HTML
209    fn format_lines(&self, lines: &[LineData]) -> String {
210        let mut html = String::new();
211
212        for line in lines {
213            match line.line_type.as_str() {
214                "text" => {
215                    html.push_str("<p>");
216                    html.push_str(&self.escape_html(&line.text));
217                    html.push_str("</p>\n");
218                }
219                "math" | "equation" => {
220                    let latex = line.latex.as_ref().unwrap_or(&line.text);
221                    html.push_str(r#"<div class="math-display">"#);
222                    if self.accessibility {
223                        html.push_str(&format!(
224                            r#"<span class="sr-only">Equation: {}</span>"#,
225                            self.escape_html(&line.text)
226                        ));
227                    }
228                    html.push_str(&format!("$${}$$", latex));
229                    html.push_str("</div>\n");
230                }
231                "inline_math" => {
232                    let latex = line.latex.as_ref().unwrap_or(&line.text);
233                    html.push_str(&format!(r#"<span class="math-inline">${}$</span>"#, latex));
234                }
235                "heading" => {
236                    html.push_str(&format!("<h2>{}</h2>\n", self.escape_html(&line.text)));
237                }
238                "table" => {
239                    html.push_str(&self.format_table(&line.text));
240                }
241                "image" => {
242                    html.push_str(&format!(
243                        r#"<img src="{}" alt="Image" loading="lazy">"#,
244                        self.escape_html(&line.text)
245                    ));
246                    html.push_str("\n");
247                }
248                _ => {
249                    html.push_str("<p>");
250                    html.push_str(&self.escape_html(&line.text));
251                    html.push_str("</p>\n");
252                }
253            }
254        }
255
256        html
257    }
258
259    /// Format table to HTML
260    fn format_table(&self, table: &str) -> String {
261        let mut html = String::from("<table>\n");
262
263        let rows: Vec<&str> = table.lines().collect();
264        for (i, row) in rows.iter().enumerate() {
265            html.push_str("  <tr>\n");
266
267            let cells: Vec<&str> = row.split('|')
268                .map(|s| s.trim())
269                .filter(|s| !s.is_empty())
270                .collect();
271
272            let tag = if i == 0 { "th" } else { "td" };
273
274            for cell in cells {
275                html.push_str(&format!("    <{}>{}</{}>\n", tag, self.escape_html(cell), tag));
276            }
277
278            html.push_str("  </tr>\n");
279        }
280
281        html.push_str("</table>\n");
282        html
283    }
284
285    /// Escape HTML special characters
286    fn escape_html(&self, text: &str) -> String {
287        text.replace('&', "&amp;")
288            .replace('<', "&lt;")
289            .replace('>', "&gt;")
290            .replace('"', "&quot;")
291            .replace('\'', "&#39;")
292    }
293}
294
295impl Default for HtmlFormatter {
296    fn default() -> Self {
297        Self::new()
298    }
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304    use crate::output::BoundingBox;
305
306    #[test]
307    fn test_html_header() {
308        let formatter = HtmlFormatter::new().with_engine(HtmlEngine::MathJax);
309        let header = formatter.html_header();
310
311        assert!(header.contains("<!DOCTYPE html>"));
312        assert!(header.contains("MathJax"));
313    }
314
315    #[test]
316    fn test_katex_header() {
317        let formatter = HtmlFormatter::new().with_engine(HtmlEngine::KaTeX);
318        let header = formatter.html_header();
319
320        assert!(header.contains("katex"));
321    }
322
323    #[test]
324    fn test_escape_html() {
325        let formatter = HtmlFormatter::new();
326        let result = formatter.escape_html("<script>alert('test')</script>");
327
328        assert!(result.contains("&lt;"));
329        assert!(result.contains("&gt;"));
330        assert!(!result.contains("<script>"));
331    }
332
333    #[test]
334    fn test_format_lines() {
335        let formatter = HtmlFormatter::new();
336        let lines = vec![
337            LineData {
338                line_type: "text".to_string(),
339                text: "Introduction".to_string(),
340                latex: None,
341                bbox: BoundingBox::new(0.0, 0.0, 100.0, 20.0),
342                confidence: 0.95,
343                words: None,
344            },
345            LineData {
346                line_type: "equation".to_string(),
347                text: "E = mc^2".to_string(),
348                latex: Some(r"E = mc^2".to_string()),
349                bbox: BoundingBox::new(0.0, 25.0, 100.0, 30.0),
350                confidence: 0.98,
351                words: None,
352            },
353        ];
354
355        let result = formatter.format_lines(&lines);
356        assert!(result.contains("<p>Introduction</p>"));
357        assert!(result.contains("math-display"));
358        assert!(result.contains("$$"));
359    }
360
361    #[test]
362    fn test_dark_theme() {
363        let formatter = HtmlFormatter::new().theme(HtmlTheme::Dark);
364        let css = formatter.generate_css();
365
366        assert!(css.contains("theme-dark"));
367        assert!(css.contains("#1e1e1e"));
368    }
369
370    #[test]
371    fn test_accessibility() {
372        let formatter = HtmlFormatter::new().accessibility(true);
373        let lines = vec![
374            LineData {
375                line_type: "equation".to_string(),
376                text: "x squared".to_string(),
377                latex: Some("x^2".to_string()),
378                bbox: BoundingBox::new(0.0, 0.0, 100.0, 20.0),
379                confidence: 0.98,
380                words: None,
381            },
382        ];
383
384        let result = formatter.format_lines(&lines);
385        assert!(result.contains("sr-only"));
386        assert!(result.contains("Equation:"));
387    }
388}