Skip to main content

typub_html/serialize/
mod.rs

1//! HTML serialization for typub HTML IR v2.
2//!
3//! Converts semantic `Document`/`Block`/`Inline` structures to HTML.
4
5use scraper::{Html, Selector};
6use std::collections::BTreeMap;
7
8use typub_ir::{
9    AdmonitionKind, Asset, AssetId, AssetRef, AssetSource, Block, BlockAttrs, DefinitionItem,
10    Document, FlowListItem, FootnoteDef, FootnoteId, ImageAttrs, Inline, InlineAttrs, List,
11    ListKind, MathPayload, MathSource, OrderedListMarker, RenderedArtifact, TableCell,
12    TableCellKind, TableHeaderScope, TableSectionKind, TaskListItem, TextAlign, TextStyle,
13    UnknownChild,
14};
15
16mod attrs;
17mod footnotes;
18mod lists;
19mod math;
20#[cfg(test)]
21mod tests;
22
23use attrs::*;
24use footnotes::serialize_footnotes;
25use lists::{serialize_list, serialize_table_cell};
26use math::*;
27
28/// Options for platform-specific HTML serialization.
29#[derive(Debug, Clone, Default)]
30pub struct SerializeOptions {
31    /// Wrap `<li>` content in `<span style="display:inline;">`.
32    pub li_span_wrap: bool,
33    /// Use syntax-highlighted HTML in code blocks when available.
34    pub use_code_highlight: bool,
35    /// Use `<blockquote>` instead of `<div>` for admonitions.
36    pub blockquote_for_admonition: bool,
37    /// Emit nested list blocks as siblings of `<li>` for editor compatibility.
38    pub sibling_nested_lists: bool,
39    /// Convert definition lists to paragraph fallback.
40    pub definition_list_to_paragraph: bool,
41}
42
43struct SerializeCtx<'a> {
44    assets: &'a BTreeMap<AssetId, Asset>,
45    options: &'a SerializeOptions,
46}
47
48/// Escape text content for safe HTML embedding.
49pub fn escape_html_text(text: &str) -> String {
50    text.replace('&', "&amp;")
51        .replace('<', "&lt;")
52        .replace('>', "&gt;")
53}
54
55/// Escape attribute values for safe HTML embedding.
56pub fn escape_html_attr(text: &str) -> String {
57    text.replace('&', "&amp;")
58        .replace('<', "&lt;")
59        .replace('>', "&gt;")
60        .replace('"', "&quot;")
61}
62
63/// Extract plain text from inlines.
64pub fn inlines_text(inlines: &[Inline]) -> String {
65    let mut out = String::new();
66    for inline in inlines {
67        match inline {
68            Inline::Text(t) | Inline::Code(t) => out.push_str(t),
69            Inline::SoftBreak | Inline::HardBreak => out.push(' '),
70            Inline::Styled { content, .. } => out.push_str(&inlines_text(content)),
71            Inline::Link { content, .. } => out.push_str(&inlines_text(content)),
72            Inline::Image { alt, .. } => out.push_str(alt),
73            Inline::FootnoteRef(id) => {
74                out.push('[');
75                out.push_str(&id.0);
76                out.push(']');
77            }
78            Inline::MathInline { math, .. } => out.push_str(&math_source_text(&math.src)),
79            Inline::SvgInline { svg, .. } => out.push_str(&math_source_text(&svg.src)),
80            Inline::UnknownInline { content, .. } => out.push_str(&inlines_text(content)),
81            Inline::RawInline { .. } => {}
82        }
83    }
84    out
85}
86
87/// Serialize a full document to HTML using default options.
88pub fn document_to_html(doc: &Document) -> String {
89    document_to_html_with_options(doc, &SerializeOptions::default())
90}
91
92/// Serialize a full document to HTML using custom options.
93pub fn document_to_html_with_options(doc: &Document, options: &SerializeOptions) -> String {
94    let ctx = SerializeCtx {
95        assets: &doc.assets,
96        options,
97    };
98
99    let mut out = String::new();
100    serialize_blocks(&ctx, &doc.blocks, &mut out);
101    serialize_footnotes(&ctx, &doc.footnotes, &mut out);
102    out
103}
104
105/// Serialize inlines to HTML with document asset context and default options.
106pub fn inlines_to_html(inlines: &[Inline], assets: &BTreeMap<AssetId, Asset>) -> String {
107    inlines_to_html_with_options(inlines, assets, &SerializeOptions::default())
108}
109
110/// Serialize inlines to HTML with document asset context and custom options.
111pub fn inlines_to_html_with_options(
112    inlines: &[Inline],
113    assets: &BTreeMap<AssetId, Asset>,
114    options: &SerializeOptions,
115) -> String {
116    let ctx = SerializeCtx { assets, options };
117    serialize_inlines(&ctx, inlines)
118}
119
120fn serialize_blocks(ctx: &SerializeCtx<'_>, blocks: &[Block], out: &mut String) {
121    for block in blocks {
122        serialize_block(ctx, block, out);
123    }
124}
125
126fn serialize_block(ctx: &SerializeCtx<'_>, block: &Block, out: &mut String) {
127    match block {
128        Block::Heading {
129            level,
130            id,
131            content,
132            attrs,
133        } => {
134            let mut extra = Vec::new();
135            if let Some(anchor) = id {
136                extra.push(("id", anchor.0.clone()));
137            }
138            let attr_str = block_attrs_to_html(attrs, &extra, &[]);
139            out.push_str(&format!(
140                "<h{}{}>{}</h{}>\n",
141                level.get(),
142                attr_str,
143                serialize_inlines(ctx, content),
144                level.get()
145            ));
146        }
147        Block::Paragraph { content, attrs } => {
148            let attr_str = block_attrs_to_html(attrs, &[], &[]);
149            out.push_str(&format!(
150                "<p{}>{}</p>\n",
151                attr_str,
152                serialize_inlines(ctx, content)
153            ));
154        }
155        Block::Quote {
156            blocks,
157            cite,
158            attrs,
159        } => {
160            let mut extra = Vec::new();
161            if let Some(url) = cite {
162                extra.push(("cite", url.0.clone()));
163            }
164            let attr_str = block_attrs_to_html(attrs, &extra, &[]);
165            let mut content = String::new();
166            serialize_blocks(ctx, blocks, &mut content);
167            out.push_str(&format!(
168                "<blockquote{}>{}</blockquote>\n",
169                attr_str,
170                content.trim_end()
171            ));
172        }
173        Block::CodeBlock {
174            code,
175            language,
176            filename,
177            highlight_lines,
178            highlighted_html,
179            attrs,
180        } => {
181            let mut extra = Vec::new();
182            if let Some(name) = filename {
183                extra.push(("data-filename", name.clone()));
184            }
185            if !highlight_lines.is_empty() {
186                let lines = highlight_lines
187                    .iter()
188                    .map(u32::to_string)
189                    .collect::<Vec<_>>()
190                    .join(",");
191                extra.push(("data-highlight-lines", lines));
192            }
193            let pre_attr = block_attrs_to_html(attrs, &extra, &[]);
194
195            let mut code_extra = Vec::new();
196            if let Some(lang) = language {
197                code_extra.push(("data-lang", lang.clone()));
198                code_extra.push(("class", format!("hljs language-{}", lang)));
199            }
200            let code_attr = extra_attrs_to_html(&code_extra);
201            let code_content = if ctx.options.use_code_highlight {
202                highlighted_html.as_deref().unwrap_or(code)
203            } else {
204                code
205            };
206            let code_body = if ctx.options.use_code_highlight && highlighted_html.is_some() {
207                code_content.to_string()
208            } else {
209                escape_html_text(code_content)
210            };
211
212            out.push_str(&format!(
213                "<pre{}><code{}>{}</code></pre>\n",
214                pre_attr, code_attr, code_body
215            ));
216        }
217        Block::Divider { attrs } => {
218            let attr_str = block_attrs_to_html(attrs, &[], &[]);
219            out.push_str(&format!("<hr{}>\n", attr_str));
220        }
221        Block::List { list, attrs } => serialize_list(ctx, list, attrs, out),
222        Block::DefinitionList { items, attrs } => serialize_definition_list(ctx, items, attrs, out),
223        Block::Table {
224            caption,
225            sections,
226            attrs,
227        } => {
228            let attr_str = block_attrs_to_html(attrs, &[], &[]);
229            out.push_str(&format!("<table{}>", attr_str));
230
231            if let Some(caption_blocks) = caption {
232                out.push_str("<caption>");
233                serialize_blocks(ctx, caption_blocks, out);
234                out.push_str("</caption>");
235            }
236
237            for section in sections {
238                let section_tag = match section.kind {
239                    TableSectionKind::Head => "thead",
240                    TableSectionKind::Body => "tbody",
241                    TableSectionKind::Foot => "tfoot",
242                };
243                let section_attr = block_attrs_to_html(&section.attrs, &[], &[]);
244                out.push_str(&format!("<{}{}>", section_tag, section_attr));
245                for row in &section.rows {
246                    let row_attr = block_attrs_to_html(&row.attrs, &[], &[]);
247                    out.push_str(&format!("<tr{}>", row_attr));
248                    for cell in &row.cells {
249                        serialize_table_cell(ctx, cell, out);
250                    }
251                    out.push_str("</tr>");
252                }
253                out.push_str(&format!("</{}>", section_tag));
254            }
255
256            out.push_str("</table>\n");
257        }
258        Block::Figure {
259            content,
260            caption,
261            attrs,
262        } => {
263            let attr_str = block_attrs_to_html(attrs, &[], &[]);
264            out.push_str(&format!("<figure{}>", attr_str));
265            serialize_blocks(ctx, content, out);
266            if let Some(caption_blocks) = caption {
267                out.push_str("<figcaption>");
268                serialize_blocks(ctx, caption_blocks, out);
269                out.push_str("</figcaption>");
270            }
271            out.push_str("</figure>\n");
272        }
273        Block::Admonition {
274            kind,
275            title,
276            blocks,
277            attrs,
278        } => {
279            let wrapper_tag = if ctx.options.blockquote_for_admonition {
280                "blockquote"
281            } else {
282                "div"
283            };
284            let mut classes = vec!["admonition".to_string(), admonition_kind_class(kind)];
285            classes.extend(attrs.classes.iter().cloned());
286            let attr_str = attrs_to_html(
287                &classes,
288                attrs.style.as_deref(),
289                &attrs.passthrough,
290                &[],
291                &["class"],
292            );
293
294            out.push_str(&format!("<{}{}>", wrapper_tag, attr_str));
295            if let Some(t) = title {
296                out.push_str(&format!(
297                    "<p class=\"admonition-title\"><strong>{}</strong></p>",
298                    serialize_inlines(ctx, t)
299                ));
300            }
301            serialize_blocks(ctx, blocks, out);
302            out.push_str(&format!("</{}>\n", wrapper_tag));
303        }
304        Block::Details {
305            summary,
306            blocks,
307            open,
308            attrs,
309        } => {
310            let mut extra = Vec::new();
311            if *open {
312                extra.push(("open", "open".to_string()));
313            }
314            let attr_str = block_attrs_to_html(attrs, &extra, &[]);
315            out.push_str(&format!("<details{}>", attr_str));
316            if let Some(sum) = summary {
317                out.push_str(&format!(
318                    "<summary>{}</summary>",
319                    serialize_inlines(ctx, sum)
320                ));
321            }
322            serialize_blocks(ctx, blocks, out);
323            out.push_str("</details>\n");
324        }
325        Block::MathBlock { math, attrs } => {
326            out.push_str(&serialize_math_block(ctx, math, attrs));
327            out.push('\n');
328        }
329        Block::SvgBlock { svg, attrs } => {
330            out.push_str(&serialize_svg_block(ctx, svg, attrs));
331            out.push('\n');
332        }
333        Block::UnknownBlock {
334            tag,
335            attrs,
336            children,
337            data: _,
338            note,
339            source,
340        } => {
341            let mut extra = vec![("data-unknown-block", tag.clone())];
342            if let Some(n) = note {
343                extra.push(("data-unknown-note", n.clone()));
344            }
345            let attr_str = block_attrs_to_html(attrs, &extra, &[]);
346            out.push_str(&format!("<div{}>", attr_str));
347            if let Some(src) = source {
348                out.push_str(&format!(
349                    "<pre data-unknown-source=\"true\">{}</pre>",
350                    escape_html_text(src)
351                ));
352            }
353            serialize_unknown_children(ctx, children, out);
354            out.push_str("</div>\n");
355        }
356        Block::RawBlock {
357            html,
358            origin: _,
359            trust: _,
360            attrs: _,
361        } => {
362            out.push_str(html);
363            if !html.ends_with('\n') {
364                out.push('\n');
365            }
366        }
367    }
368}
369
370fn serialize_unknown_children(ctx: &SerializeCtx<'_>, children: &[UnknownChild], out: &mut String) {
371    for child in children {
372        match child {
373            UnknownChild::Block(block) => serialize_block(ctx, block, out),
374            UnknownChild::Inline(inline) => out.push_str(&serialize_inline(ctx, inline)),
375        }
376    }
377}
378
379fn serialize_definition_list(
380    ctx: &SerializeCtx<'_>,
381    items: &[DefinitionItem],
382    attrs: &BlockAttrs,
383    out: &mut String,
384) {
385    if ctx.options.definition_list_to_paragraph {
386        for item in items {
387            let term_html = item
388                .terms
389                .iter()
390                .map(|blocks| blocks_inline_fallback_html(ctx, blocks))
391                .filter(|s| !s.trim().is_empty())
392                .collect::<Vec<_>>()
393                .join(" / ");
394            let def_html = item
395                .definitions
396                .iter()
397                .map(|blocks| blocks_inline_fallback_html(ctx, blocks))
398                .filter(|s| !s.trim().is_empty())
399                .collect::<Vec<_>>()
400                .join(" ");
401            out.push_str(&format!(
402                "<p><strong>{}</strong>: {}</p>\n",
403                term_html, def_html
404            ));
405        }
406        return;
407    }
408
409    let attr_str = block_attrs_to_html(attrs, &[], &[]);
410    out.push_str(&format!("<dl{}>", attr_str));
411    for item in items {
412        for terms in &item.terms {
413            out.push_str("<dt>");
414            serialize_blocks(ctx, terms, out);
415            out.push_str("</dt>");
416        }
417        for defs in &item.definitions {
418            out.push_str("<dd>");
419            serialize_blocks(ctx, defs, out);
420            out.push_str("</dd>");
421        }
422    }
423    out.push_str("</dl>\n");
424}
425
426fn blocks_inline_fallback_html(ctx: &SerializeCtx<'_>, blocks: &[Block]) -> String {
427    let mut parts = Vec::new();
428    for block in blocks {
429        match block {
430            Block::Paragraph { content, .. } | Block::Heading { content, .. } => {
431                parts.push(serialize_inlines(ctx, content));
432            }
433            Block::CodeBlock { code, .. } => {
434                parts.push(format!("<code>{}</code>", escape_html_text(code)));
435            }
436            Block::MathBlock { math, .. } => {
437                parts.push(escape_html_text(&math_source_text(&math.src)));
438            }
439            Block::SvgBlock { svg, .. } => {
440                parts.push(escape_html_text(&math_source_text(&svg.src)));
441            }
442            _ => parts.push(escape_html_text(&block_inline_fallback_text(block))),
443        }
444    }
445    parts.join(" ")
446}
447
448fn block_inline_fallback_text(block: &Block) -> String {
449    match block {
450        Block::Heading { content, .. } | Block::Paragraph { content, .. } => inlines_text(content),
451        Block::Quote { blocks, .. }
452        | Block::Figure {
453            content: blocks, ..
454        }
455        | Block::Admonition { blocks, .. }
456        | Block::Details { blocks, .. } => blocks
457            .iter()
458            .map(block_inline_fallback_text)
459            .collect::<Vec<_>>()
460            .join(" "),
461        Block::CodeBlock { code, .. } => code.clone(),
462        Block::List { list, .. } => match &list.kind {
463            ListKind::Bullet { items } | ListKind::Numbered { items, .. } => items
464                .iter()
465                .flat_map(|i| i.blocks.iter())
466                .map(block_inline_fallback_text)
467                .collect::<Vec<_>>()
468                .join(" "),
469            ListKind::Task { items } => items
470                .iter()
471                .flat_map(|i| i.blocks.iter())
472                .map(block_inline_fallback_text)
473                .collect::<Vec<_>>()
474                .join(" "),
475            ListKind::Custom { items, .. } => items
476                .iter()
477                .flat_map(|i| i.blocks.iter())
478                .map(block_inline_fallback_text)
479                .collect::<Vec<_>>()
480                .join(" "),
481        },
482        Block::DefinitionList { items, .. } => items
483            .iter()
484            .flat_map(|item| item.terms.iter().chain(item.definitions.iter()))
485            .flat_map(|group| group.iter())
486            .map(block_inline_fallback_text)
487            .collect::<Vec<_>>()
488            .join(" "),
489        Block::Table { sections, .. } => sections
490            .iter()
491            .flat_map(|s| s.rows.iter())
492            .flat_map(|r| r.cells.iter())
493            .flat_map(|c| c.blocks.iter())
494            .map(block_inline_fallback_text)
495            .collect::<Vec<_>>()
496            .join(" "),
497        Block::MathBlock { math, .. } => math_source_text(&math.src),
498        Block::SvgBlock { svg, .. } => math_source_text(&svg.src),
499        Block::UnknownBlock { note, .. } => note.clone().unwrap_or_default(),
500        Block::RawBlock { .. } | Block::Divider { .. } => String::new(),
501    }
502}
503
504fn serialize_inlines(ctx: &SerializeCtx<'_>, inlines: &[Inline]) -> String {
505    let mut out = String::new();
506    for inline in inlines {
507        out.push_str(&serialize_inline(ctx, inline));
508    }
509    out
510}
511
512fn serialize_inline(ctx: &SerializeCtx<'_>, inline: &Inline) -> String {
513    match inline {
514        Inline::Text(text) => escape_html_text(text),
515        Inline::Code(code) => format!("<code>{}</code>", escape_html_text(code)),
516        Inline::SoftBreak => " ".to_string(),
517        Inline::HardBreak => "<br>".to_string(),
518        Inline::Styled {
519            styles,
520            content,
521            attrs,
522        } => serialize_styled_inline(ctx, styles.styles(), content, attrs),
523        Inline::Link {
524            content,
525            href,
526            title,
527            attrs,
528        } => {
529            let mut extra = vec![("href", href.0.clone())];
530            if let Some(t) = title {
531                extra.push(("title", t.clone()));
532            }
533            let attr_str = inline_attrs_to_html(attrs, &extra, &["href", "title"]);
534            format!("<a{}>{}</a>", attr_str, serialize_inlines(ctx, content))
535        }
536        Inline::Image {
537            asset,
538            alt,
539            title,
540            attrs,
541        } => serialize_image_inline(ctx, asset, alt, title.as_deref(), attrs),
542        Inline::FootnoteRef(id) => {
543            let escaped = escape_html_attr(&id.0);
544            format!(
545                "<sup><a href=\"#fn-{}\" id=\"fnref-{}\">[{}]</a></sup>",
546                escaped, escaped, escaped
547            )
548        }
549        Inline::MathInline { math, attrs } => serialize_math_inline(ctx, math, attrs),
550        Inline::SvgInline { svg, attrs } => serialize_svg_inline(ctx, svg, attrs),
551        Inline::UnknownInline {
552            tag,
553            attrs,
554            content,
555            data: _,
556            note,
557            source,
558        } => {
559            let mut extra = vec![("data-unknown-inline", tag.clone())];
560            if let Some(n) = note {
561                extra.push(("data-unknown-note", n.clone()));
562            }
563            let attr_str = inline_attrs_to_html(attrs, &extra, &[]);
564            let mut html = String::new();
565            html.push_str(&format!("<span{}>", attr_str));
566            html.push_str(&serialize_inlines(ctx, content));
567            if let Some(src) = source {
568                html.push_str(&format!(
569                    "<code data-unknown-source=\"true\">{}</code>",
570                    escape_html_text(src)
571                ));
572            }
573            html.push_str("</span>");
574            html
575        }
576        Inline::RawInline {
577            html,
578            origin: _,
579            trust: _,
580            attrs: _,
581        } => html.clone(),
582    }
583}
584
585fn serialize_styled_inline(
586    ctx: &SerializeCtx<'_>,
587    styles: &[TextStyle],
588    content: &[Inline],
589    attrs: &InlineAttrs,
590) -> String {
591    let mut html = serialize_inlines(ctx, content);
592    if attrs != &InlineAttrs::default() {
593        let attr_str = inline_attrs_to_html(attrs, &[], &[]);
594        html = format!("<span{}>{}</span>", attr_str, html);
595    }
596
597    for style in styles.iter().rev() {
598        let (open, close) = text_style_tag(*style);
599        html = format!("<{}>{}</{}>", open, html, close);
600    }
601
602    html
603}
604
605fn text_style_tag(style: TextStyle) -> (&'static str, &'static str) {
606    match style {
607        TextStyle::Bold => ("strong", "strong"),
608        TextStyle::Italic => ("em", "em"),
609        TextStyle::Strikethrough => ("s", "s"),
610        TextStyle::Underline => ("u", "u"),
611        TextStyle::Mark => ("mark", "mark"),
612        TextStyle::Superscript => ("sup", "sup"),
613        TextStyle::Subscript => ("sub", "sub"),
614        TextStyle::Kbd => ("kbd", "kbd"),
615    }
616}
617
618fn serialize_image_inline(
619    ctx: &SerializeCtx<'_>,
620    asset_ref: &AssetRef,
621    alt: &str,
622    title: Option<&str>,
623    attrs: &ImageAttrs,
624) -> String {
625    let mut extra = vec![("alt", alt.to_string())];
626
627    if let Some(src) = resolve_asset_src(asset_ref, ctx.assets) {
628        extra.push(("src", src));
629    } else {
630        extra.push(("src", "".to_string()));
631        extra.push(("data-missing-asset", asset_ref.0.0.clone()));
632    }
633
634    if let Some(t) = title {
635        extra.push(("title", t.to_string()));
636    }
637    if let Some(width) = attrs.width {
638        extra.push(("width", width.to_string()));
639    }
640    if let Some(height) = attrs.height {
641        extra.push(("height", height.to_string()));
642    }
643    if let Some(align) = attrs.align {
644        extra.push(("data-align", text_align_css_value(align).to_string()));
645    }
646
647    let attr_str = attrs_to_html(
648        &[],
649        None,
650        &attrs.passthrough,
651        &extra,
652        &["src", "alt", "title", "width", "height", "data-align"],
653    );
654    format!("<img{}>", attr_str)
655}
656
657fn resolve_asset_src(asset_ref: &AssetRef, assets: &BTreeMap<AssetId, Asset>) -> Option<String> {
658    let asset = assets.get(&asset_ref.0)?;
659    let (source, variants) = match asset {
660        Asset::Image(a) => (&a.source, &a.variants),
661        Asset::Video(a) | Asset::Audio(a) => (&a.source, &a.variants),
662        Asset::File(a) => (&a.source, &a.variants),
663        Asset::Custom(a) => (&a.source, &a.variants),
664    };
665
666    if let Some(url) = variants
667        .iter()
668        .find(|v| v.name == "original")
669        .map(|v| v.publish_url.0.clone())
670        .or_else(|| variants.first().map(|v| v.publish_url.0.clone()))
671    {
672        return Some(url);
673    }
674
675    match source {
676        AssetSource::RemoteUrl { url } => Some(url.0.clone()),
677        AssetSource::DataUri { uri } => Some(uri.clone()),
678        AssetSource::LocalPath { path } => Some(path.as_str().to_string()),
679    }
680}