Skip to main content

lex_babel/formats/html/
serializer.rs

1//! HTML serialization (Lex → HTML export)
2//!
3//! Converts Lex documents to semantic HTML5 with embedded CSS.
4//! Pipeline: Lex AST → IR → Events → RcDom → HTML string
5
6use crate::common::nested_to_flat::tree_to_events;
7use crate::error::FormatError;
8use crate::formats::html::HtmlTheme;
9use crate::ir::events::Event;
10use crate::ir::nodes::{DocNode, InlineContent, TableCellAlignment};
11use html5ever::{
12    ns, serialize, serialize::SerializeOpts, serialize::TraversalScope, Attribute, LocalName,
13    QualName,
14};
15use lex_core::lex::ast::Document;
16use markup5ever_rcdom::{Handle, Node, NodeData, RcDom, SerializableHandle};
17use std::cell::{Cell, RefCell};
18use std::default::Default;
19use std::rc::Rc;
20
21/// Options for HTML serialization
22#[derive(Debug, Clone, Default)]
23pub struct HtmlOptions {
24    /// CSS theme to use
25    pub theme: HtmlTheme,
26    /// Optional custom CSS to append after the baseline and theme CSS
27    pub custom_css: Option<String>,
28}
29
30impl HtmlOptions {
31    pub fn new(theme: HtmlTheme) -> Self {
32        Self {
33            theme,
34            custom_css: None,
35        }
36    }
37
38    pub fn with_custom_css(mut self, css: String) -> Self {
39        self.custom_css = Some(css);
40        self
41    }
42}
43
44/// Serialize a Lex document to HTML with the given theme
45pub fn serialize_to_html(doc: &Document, theme: HtmlTheme) -> Result<String, FormatError> {
46    serialize_to_html_with_options(doc, HtmlOptions::new(theme))
47}
48
49/// Serialize a Lex document to HTML with full options
50pub fn serialize_to_html_with_options(
51    doc: &Document,
52    options: HtmlOptions,
53) -> Result<String, FormatError> {
54    // Extract document title from root session (before IR conversion loses it)
55    let title = doc.root.title.as_string();
56    let title = if title.is_empty() {
57        "Lex Document".to_string()
58    } else {
59        title.to_string()
60    };
61
62    // Step 1: Lex AST → IR
63    let ir_doc = crate::to_ir(doc);
64
65    // Step 2: IR → Events
66    let events = tree_to_events(&DocNode::Document(ir_doc));
67
68    // Step 3: Events → RcDom (HTML DOM tree)
69    let dom = build_html_dom(&events)?;
70
71    // Step 4: RcDom → HTML string
72    let html_string = serialize_dom(&dom)?;
73
74    // Step 5: Wrap in complete HTML document with CSS
75    let complete_html = wrap_in_document(&html_string, &title, &options)?;
76
77    Ok(complete_html)
78}
79
80/// Build an HTML DOM tree from IR events
81fn build_html_dom(events: &[Event]) -> Result<RcDom, FormatError> {
82    let dom = RcDom::default();
83
84    // Create document container
85    let doc_container = create_element("div", vec![("class", "lex-document")]);
86
87    let mut current_parent: Handle = doc_container.clone();
88    let mut parent_stack: Vec<Handle> = vec![];
89
90    // State for collecting verbatim content
91    let mut in_verbatim = false;
92    let mut verbatim_language: Option<String> = None;
93    let mut verbatim_content = String::new();
94
95    // State for heading context
96    let mut current_heading: Option<Handle> = None;
97
98    for event in events {
99        match event {
100            Event::StartDocument => {
101                // Already created doc_container
102            }
103
104            Event::EndDocument => {
105                // Done
106            }
107
108            Event::StartHeading(level) => {
109                // Create section wrapper for this session
110                let class = format!("lex-session lex-session-{level}");
111                let section = create_element("section", vec![("class", &class)]);
112                current_parent.children.borrow_mut().push(section.clone());
113                parent_stack.push(current_parent.clone());
114                current_parent = section;
115
116                // Create heading element (h1-h6, max at h6)
117                // For levels > 6, add class attribute to preserve true depth
118                let clamped = (*level as u8).min(6);
119                let heading_tag = format!("h{clamped}");
120                let heading = if *level > 6 {
121                    let class = format!("lex-level-{level}");
122                    create_element(&heading_tag, vec![("class", &class)])
123                } else {
124                    create_element(&heading_tag, vec![])
125                };
126                current_parent.children.borrow_mut().push(heading.clone());
127                current_heading = Some(heading);
128            }
129
130            Event::EndHeading(_) => {
131                current_heading = None;
132                // Close section
133                current_parent = parent_stack.pop().ok_or_else(|| {
134                    FormatError::SerializationError("Unbalanced heading end".to_string())
135                })?;
136            }
137
138            Event::StartContent => {
139                // Create content wrapper (mirrors AST container structure for indentation)
140                current_heading = None;
141                let content = create_element("div", vec![("class", "lex-content")]);
142                current_parent.children.borrow_mut().push(content.clone());
143                parent_stack.push(current_parent.clone());
144                current_parent = content;
145            }
146
147            Event::EndContent => {
148                // Close content wrapper
149                current_parent = parent_stack.pop().ok_or_else(|| {
150                    FormatError::SerializationError("Unbalanced content end".to_string())
151                })?;
152            }
153
154            Event::StartParagraph => {
155                current_heading = None;
156                let para = create_element("p", vec![("class", "lex-paragraph")]);
157                current_parent.children.borrow_mut().push(para.clone());
158                parent_stack.push(current_parent.clone());
159                current_parent = para;
160            }
161
162            Event::EndParagraph => {
163                current_parent = parent_stack.pop().ok_or_else(|| {
164                    FormatError::SerializationError("Unbalanced paragraph end".to_string())
165                })?;
166            }
167
168            Event::StartList { ordered, style } => {
169                current_heading = None;
170                let tag = if *ordered { "ol" } else { "ul" };
171                // For ordered lists, set the HTML type attribute to preserve decoration style
172                let list = match style {
173                    crate::ir::nodes::ListStyle::AlphaLower => {
174                        create_element(tag, vec![("class", "lex-list"), ("type", "a")])
175                    }
176                    crate::ir::nodes::ListStyle::AlphaUpper => {
177                        create_element(tag, vec![("class", "lex-list"), ("type", "A")])
178                    }
179                    crate::ir::nodes::ListStyle::RomanLower => {
180                        create_element(tag, vec![("class", "lex-list"), ("type", "i")])
181                    }
182                    crate::ir::nodes::ListStyle::RomanUpper => {
183                        create_element(tag, vec![("class", "lex-list"), ("type", "I")])
184                    }
185                    _ => create_element(tag, vec![("class", "lex-list")]),
186                };
187                current_parent.children.borrow_mut().push(list.clone());
188                parent_stack.push(current_parent.clone());
189                current_parent = list;
190            }
191
192            Event::EndList => {
193                current_parent = parent_stack.pop().ok_or_else(|| {
194                    FormatError::SerializationError("Unbalanced list end".to_string())
195                })?;
196            }
197
198            Event::StartListItem => {
199                current_heading = None;
200                let item = create_element("li", vec![("class", "lex-list-item")]);
201                current_parent.children.borrow_mut().push(item.clone());
202                parent_stack.push(current_parent.clone());
203                current_parent = item;
204            }
205
206            Event::EndListItem => {
207                current_parent = parent_stack.pop().ok_or_else(|| {
208                    FormatError::SerializationError("Unbalanced list item end".to_string())
209                })?;
210            }
211
212            Event::StartVerbatim(language) => {
213                current_heading = None;
214                in_verbatim = true;
215                verbatim_language = language.clone();
216                verbatim_content.clear();
217            }
218
219            Event::EndVerbatim => {
220                // Check for special metadata comment format
221                if let Some(ref lang) = verbatim_language {
222                    if let Some(label) = lang.strip_prefix("lex-metadata:") {
223                        // Render as comment
224                        let comment_text = format!(" lex:{label}{verbatim_content}");
225                        let comment_node = create_comment(&comment_text);
226                        current_parent.children.borrow_mut().push(comment_node);
227
228                        in_verbatim = false;
229                        verbatim_language = None;
230                        verbatim_content.clear();
231                        continue; // Skip normal verbatim handling
232                    }
233                }
234
235                // Create pre + code block
236                let mut attrs = vec![("class", "lex-verbatim")];
237                let lang_string;
238                if let Some(ref lang) = verbatim_language {
239                    lang_string = lang.clone();
240                    attrs.push(("data-language", &lang_string));
241                }
242
243                let pre = create_element("pre", attrs);
244                let code = create_element("code", vec![]);
245                let text = create_text(&verbatim_content);
246                code.children.borrow_mut().push(text);
247                pre.children.borrow_mut().push(code);
248                current_parent.children.borrow_mut().push(pre);
249
250                in_verbatim = false;
251                verbatim_language = None;
252                verbatim_content.clear();
253            }
254
255            Event::StartDefinition => {
256                current_heading = None;
257                let dl = create_element("dl", vec![("class", "lex-definition")]);
258                current_parent.children.borrow_mut().push(dl.clone());
259                parent_stack.push(current_parent.clone());
260                current_parent = dl;
261            }
262
263            Event::EndDefinition => {
264                current_parent = parent_stack.pop().ok_or_else(|| {
265                    FormatError::SerializationError("Unbalanced definition end".to_string())
266                })?;
267            }
268
269            Event::StartDefinitionTerm => {
270                let dt = create_element("dt", vec![]);
271                current_parent.children.borrow_mut().push(dt.clone());
272                parent_stack.push(current_parent.clone());
273                current_parent = dt;
274            }
275
276            Event::EndDefinitionTerm => {
277                current_parent = parent_stack.pop().ok_or_else(|| {
278                    FormatError::SerializationError("Unbalanced definition term end".to_string())
279                })?;
280            }
281
282            Event::StartDefinitionDescription => {
283                let dd = create_element("dd", vec![]);
284                current_parent.children.borrow_mut().push(dd.clone());
285                parent_stack.push(current_parent.clone());
286                current_parent = dd;
287            }
288
289            Event::EndDefinitionDescription => {
290                current_parent = parent_stack.pop().ok_or_else(|| {
291                    FormatError::SerializationError(
292                        "Unbalanced definition description end".to_string(),
293                    )
294                })?;
295            }
296
297            Event::StartTable => {
298                current_heading = None;
299                let table = create_element("table", vec![("class", "lex-table")]);
300                current_parent.children.borrow_mut().push(table.clone());
301                parent_stack.push(current_parent.clone());
302                current_parent = table;
303            }
304
305            Event::EndTable => {
306                current_parent = parent_stack.pop().ok_or_else(|| {
307                    FormatError::SerializationError("Unbalanced table end".to_string())
308                })?;
309            }
310
311            Event::StartTableRow { header: _ } => {
312                let tr = create_element("tr", vec![]);
313                current_parent.children.borrow_mut().push(tr.clone());
314                parent_stack.push(current_parent.clone());
315                current_parent = tr;
316            }
317
318            Event::EndTableRow => {
319                current_parent = parent_stack.pop().ok_or_else(|| {
320                    FormatError::SerializationError("Unbalanced table row end".to_string())
321                })?;
322            }
323
324            Event::StartTableCell { header, align } => {
325                let tag = if *header { "th" } else { "td" };
326                let mut attrs = vec![];
327                match align {
328                    TableCellAlignment::Left => attrs.push(("style", "text-align: left")),
329                    TableCellAlignment::Right => attrs.push(("style", "text-align: right")),
330                    TableCellAlignment::Center => attrs.push(("style", "text-align: center")),
331                    TableCellAlignment::None => {}
332                }
333
334                let cell = create_element(tag, attrs);
335                current_parent.children.borrow_mut().push(cell.clone());
336                parent_stack.push(current_parent.clone());
337                current_parent = cell;
338            }
339
340            Event::EndTableCell => {
341                current_parent = parent_stack.pop().ok_or_else(|| {
342                    FormatError::SerializationError("Unbalanced table cell end".to_string())
343                })?;
344            }
345
346            Event::Inline(inline_content) => {
347                if in_verbatim {
348                    // Accumulate verbatim content
349                    if let InlineContent::Text(text) = inline_content {
350                        verbatim_content.push_str(text);
351                    }
352                } else if let Some(ref heading) = current_heading {
353                    // Add to heading
354                    add_inline_to_node(heading, inline_content)?;
355                } else {
356                    // Add to current parent
357                    add_inline_to_node(&current_parent, inline_content)?;
358                }
359            }
360
361            Event::StartAnnotation { label, parameters } => {
362                current_heading = None;
363                // Create HTML comment
364                let mut comment = format!(" lex:{label}");
365                for (key, value) in parameters {
366                    comment.push_str(&format!(" {key}={value}"));
367                }
368                comment.push(' ');
369                let comment_node = create_comment(&comment);
370                current_parent.children.borrow_mut().push(comment_node);
371            }
372
373            Event::EndAnnotation { label } => {
374                // Closing comment
375                let comment = format!(" /lex:{label} ");
376                let comment_node = create_comment(&comment);
377                current_parent.children.borrow_mut().push(comment_node);
378            }
379
380            Event::Image(image) => {
381                let figure = create_element("figure", vec![("class", "lex-image")]);
382                current_parent.children.borrow_mut().push(figure.clone());
383
384                let mut attrs = vec![("src", image.src.as_str()), ("alt", image.alt.as_str())];
385                if let Some(title) = &image.title {
386                    attrs.push(("title", title.as_str()));
387                }
388                let img = create_element("img", attrs);
389                figure.children.borrow_mut().push(img);
390
391                if !image.alt.is_empty() {
392                    let caption = create_element("figcaption", vec![]);
393                    let text = create_text(&image.alt);
394                    caption.children.borrow_mut().push(text);
395                    figure.children.borrow_mut().push(caption);
396                }
397            }
398
399            Event::Video(video) => {
400                let figure = create_element("figure", vec![("class", "lex-video")]);
401                current_parent.children.borrow_mut().push(figure.clone());
402
403                let mut attrs = vec![("src", video.src.as_str()), ("controls", "")];
404                if let Some(poster) = &video.poster {
405                    attrs.push(("poster", poster.as_str()));
406                }
407                if let Some(title) = &video.title {
408                    attrs.push(("title", title.as_str()));
409                }
410                let vid = create_element("video", attrs);
411                figure.children.borrow_mut().push(vid);
412            }
413
414            Event::Audio(audio) => {
415                let figure = create_element("figure", vec![("class", "lex-audio")]);
416                current_parent.children.borrow_mut().push(figure.clone());
417
418                let mut attrs = vec![("src", audio.src.as_str()), ("controls", "")];
419                if let Some(title) = &audio.title {
420                    attrs.push(("title", title.as_str()));
421                }
422                let aud = create_element("audio", attrs);
423                figure.children.borrow_mut().push(aud);
424            }
425        }
426    }
427
428    // Set the document container as the root
429    dom.document.children.borrow_mut().push(doc_container);
430
431    Ok(dom)
432}
433
434/// Add inline content to an HTML node, handling references → anchors conversion
435fn add_inline_to_node(parent: &Handle, inline: &InlineContent) -> Result<(), FormatError> {
436    match inline {
437        InlineContent::Text(text) => {
438            let text_node = create_text(text);
439            parent.children.borrow_mut().push(text_node);
440        }
441
442        InlineContent::Bold(children) => {
443            let strong = create_element("strong", vec![]);
444            parent.children.borrow_mut().push(strong.clone());
445            for child in children {
446                add_inline_to_node(&strong, child)?;
447            }
448        }
449
450        InlineContent::Italic(children) => {
451            let em = create_element("em", vec![]);
452            parent.children.borrow_mut().push(em.clone());
453            for child in children {
454                add_inline_to_node(&em, child)?;
455            }
456        }
457
458        InlineContent::Code(code_text) => {
459            let code = create_element("code", vec![]);
460            let text = create_text(code_text);
461            code.children.borrow_mut().push(text);
462            parent.children.borrow_mut().push(code);
463        }
464
465        InlineContent::Math(math_text) => {
466            // Math rendered in a span with class
467            let math_span = create_element("span", vec![("class", "lex-math")]);
468            let dollar_open = create_text("$");
469            let math_content = create_text(math_text);
470            let dollar_close = create_text("$");
471            math_span.children.borrow_mut().push(dollar_open);
472            math_span.children.borrow_mut().push(math_content);
473            math_span.children.borrow_mut().push(dollar_close);
474            parent.children.borrow_mut().push(math_span);
475        }
476
477        InlineContent::Reference(ref_text) => {
478            // Convert to anchor
479            // Handle citations (@...) by targeting a reference ID
480            let href = if let Some(citation) = ref_text.strip_prefix('@') {
481                format!("#ref-{citation}")
482            } else {
483                ref_text.to_string()
484            };
485
486            let anchor = create_element("a", vec![("href", &href)]);
487            let anchor_text = create_text(ref_text);
488            anchor.children.borrow_mut().push(anchor_text);
489            parent.children.borrow_mut().push(anchor);
490        }
491
492        InlineContent::Marker(marker) => {
493            let span = create_element("span", vec![("class", "seq_marker")]);
494            let text = create_text(marker);
495            span.children.borrow_mut().push(text);
496            parent.children.borrow_mut().push(span);
497        }
498
499        InlineContent::Image(image) => {
500            let mut attrs = vec![("src", image.src.as_str()), ("alt", image.alt.as_str())];
501            if let Some(title) = &image.title {
502                attrs.push(("title", title.as_str()));
503            }
504            let img = create_element("img", attrs);
505            parent.children.borrow_mut().push(img);
506        }
507    }
508
509    Ok(())
510}
511
512/// Create an HTML element with attributes
513fn create_element(tag: &str, attrs: Vec<(&str, &str)>) -> Handle {
514    let qual_name = QualName::new(None, ns!(html), LocalName::from(tag));
515    let attributes = attrs
516        .into_iter()
517        .map(|(name, value)| Attribute {
518            name: QualName::new(None, ns!(), LocalName::from(name)),
519            value: value.to_string().into(),
520        })
521        .collect();
522
523    Rc::new(Node {
524        parent: Cell::new(None),
525        children: RefCell::new(Vec::new()),
526        data: NodeData::Element {
527            name: qual_name,
528            attrs: RefCell::new(attributes),
529            template_contents: Default::default(),
530            mathml_annotation_xml_integration_point: false,
531        },
532    })
533}
534
535/// Create a text node
536fn create_text(text: &str) -> Handle {
537    Rc::new(Node {
538        parent: Cell::new(None),
539        children: RefCell::new(Vec::new()),
540        data: NodeData::Text {
541            contents: RefCell::new(text.to_string().into()),
542        },
543    })
544}
545
546/// Create a comment node
547fn create_comment(text: &str) -> Handle {
548    Rc::new(Node {
549        parent: Cell::new(None),
550        children: RefCell::new(Vec::new()),
551        data: NodeData::Comment {
552            contents: text.to_string().into(),
553        },
554    })
555}
556
557/// Serialize the DOM to an HTML string (just the inner content)
558fn serialize_dom(dom: &RcDom) -> Result<String, FormatError> {
559    let mut output = Vec::new();
560
561    // Get the document container (first child of document root)
562    let doc_container = dom
563        .document
564        .children
565        .borrow()
566        .first()
567        .ok_or_else(|| FormatError::SerializationError("Empty document".to_string()))?
568        .clone();
569
570    // Serialize each child of the doc_container
571    // Use TraversalScope::IncludeNode to serialize the element AND its children
572    let opts = SerializeOpts {
573        traversal_scope: TraversalScope::IncludeNode,
574        ..Default::default()
575    };
576
577    for child in doc_container.children.borrow().iter() {
578        let serializable = SerializableHandle::from(child.clone());
579        serialize(&mut output, &serializable, opts.clone()).map_err(|e| {
580            FormatError::SerializationError(format!("HTML serialization failed: {e}"))
581        })?;
582    }
583
584    String::from_utf8(output)
585        .map_err(|e| FormatError::SerializationError(format!("UTF-8 conversion failed: {e}")))
586}
587
588/// Wrap the content in a complete HTML document with embedded CSS
589fn wrap_in_document(
590    body_html: &str,
591    title: &str,
592    options: &HtmlOptions,
593) -> Result<String, FormatError> {
594    let baseline_css = include_str!("../../../css/baseline.css");
595    let theme_css = match options.theme {
596        HtmlTheme::FancySerif => include_str!("../../../css/themes/theme-fancy-serif.css"),
597        HtmlTheme::Modern => include_str!("../../../css/themes/theme-modern.css"),
598    };
599
600    // Custom CSS is appended after baseline and theme
601    let custom_css = options.custom_css.as_deref().unwrap_or("");
602
603    // Escape HTML entities in title for safety
604    let escaped_title = html_escape(title);
605
606    let html = format!(
607        r#"<!DOCTYPE html>
608<html lang="en">
609<head>
610  <meta charset="UTF-8">
611  <meta name="viewport" content="width=device-width, initial-scale=1.0">
612  <meta name="generator" content="lex-babel">
613  <title>{escaped_title}</title>
614  <style>
615{baseline_css}
616{theme_css}
617{custom_css}
618  </style>
619</head>
620<body>
621<div class="lex-document">
622{body_html}
623</div>
624</body>
625</html>"#
626    );
627
628    Ok(html)
629}
630
631/// Escape HTML special characters in text
632fn html_escape(s: &str) -> String {
633    s.replace('&', "&amp;")
634        .replace('<', "&lt;")
635        .replace('>', "&gt;")
636        .replace('"', "&quot;")
637}
638
639#[cfg(test)]
640mod tests {
641    use super::*;
642    use lex_core::lex::transforms::standard::STRING_TO_AST;
643
644    #[test]
645    fn test_simple_paragraph() {
646        let lex_src = "This is a simple paragraph.\n";
647        let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
648
649        let html = serialize_to_html(&lex_doc, HtmlTheme::Modern).unwrap();
650
651        assert!(html.contains("<!DOCTYPE html>"));
652        assert!(html.contains("<p class=\"lex-paragraph\">"));
653        assert!(html.contains("This is a simple paragraph."));
654    }
655
656    #[test]
657    fn test_heading() {
658        let lex_src = "1. Introduction\n\n    Content here.\n";
659        let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
660
661        let html = serialize_to_html(&lex_doc, HtmlTheme::Modern).unwrap();
662
663        assert!(html.contains("<section class=\"lex-session lex-session-2\">"));
664        assert!(html.contains("<h2>"));
665        assert!(html.contains("Introduction"));
666    }
667
668    #[test]
669    fn test_css_embedded() {
670        let lex_src = "Test document.\n";
671        let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
672
673        let html = serialize_to_html(&lex_doc, HtmlTheme::Modern).unwrap();
674
675        assert!(html.contains("<style>"));
676        assert!(html.contains(".lex-document"));
677        assert!(html.contains("Helvetica")); // Modern theme uses Helvetica font
678    }
679
680    #[test]
681    fn test_fancy_serif_theme() {
682        let lex_src = "Test document.\n";
683        let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
684
685        let html = serialize_to_html(&lex_doc, HtmlTheme::FancySerif).unwrap();
686
687        assert!(html.contains("Cormorant")); // Fancy serif theme uses Cormorant font
688    }
689
690    #[test]
691    fn test_custom_css_appended() {
692        let lex_src = "Test document.\n";
693        let lex_doc = STRING_TO_AST.run(lex_src.to_string()).unwrap();
694
695        let custom_css = ".my-custom-class { color: red; }";
696        let options = HtmlOptions::new(HtmlTheme::Modern).with_custom_css(custom_css.to_string());
697        let html = serialize_to_html_with_options(&lex_doc, options).unwrap();
698
699        // Custom CSS should be present
700        assert!(html.contains(".my-custom-class { color: red; }"));
701        // Baseline CSS should still be present
702        assert!(html.contains(".lex-document"));
703    }
704
705    #[test]
706    fn test_html_options_default() {
707        let options = HtmlOptions::default();
708        assert_eq!(options.theme, HtmlTheme::Modern);
709        assert!(options.custom_css.is_none());
710    }
711}