1use scraper::{Html, Selector};
6use std::collections::BTreeMap;
7
8use typub_ir::{
9 AdmonitionKind, Asset, AssetId, AssetRef, AssetSource, Block, BlockAttrs, DefinitionItem,
10 Document, FlowListItem, FootnoteDef, FootnoteId, ImageAttrs, Inline, InlineAttrs, List,
11 ListKind, MathPayload, MathSource, OrderedListMarker, RenderedArtifact, TableCell,
12 TableCellKind, TableHeaderScope, TableSectionKind, TaskListItem, TextAlign, TextStyle,
13 UnknownChild,
14};
15
16mod attrs;
17mod footnotes;
18mod lists;
19mod math;
20#[cfg(test)]
21mod tests;
22
23use attrs::*;
24use footnotes::serialize_footnotes;
25use lists::{serialize_list, serialize_table_cell};
26use math::*;
27
28#[derive(Debug, Clone, Default)]
30pub struct SerializeOptions {
31 pub li_span_wrap: bool,
33 pub use_code_highlight: bool,
35 pub blockquote_for_admonition: bool,
37 pub sibling_nested_lists: bool,
39 pub definition_list_to_paragraph: bool,
41}
42
43struct SerializeCtx<'a> {
44 assets: &'a BTreeMap<AssetId, Asset>,
45 options: &'a SerializeOptions,
46}
47
48pub fn escape_html_text(text: &str) -> String {
50 text.replace('&', "&")
51 .replace('<', "<")
52 .replace('>', ">")
53}
54
55pub fn escape_html_attr(text: &str) -> String {
57 text.replace('&', "&")
58 .replace('<', "<")
59 .replace('>', ">")
60 .replace('"', """)
61}
62
63pub fn inlines_text(inlines: &[Inline]) -> String {
65 let mut out = String::new();
66 for inline in inlines {
67 match inline {
68 Inline::Text(t) | Inline::Code(t) => out.push_str(t),
69 Inline::SoftBreak | Inline::HardBreak => out.push(' '),
70 Inline::Styled { content, .. } => out.push_str(&inlines_text(content)),
71 Inline::Link { content, .. } => out.push_str(&inlines_text(content)),
72 Inline::Image { alt, .. } => out.push_str(alt),
73 Inline::FootnoteRef(id) => {
74 out.push('[');
75 out.push_str(&id.0);
76 out.push(']');
77 }
78 Inline::MathInline { math, .. } => out.push_str(&math_source_text(&math.src)),
79 Inline::SvgInline { svg, .. } => out.push_str(&math_source_text(&svg.src)),
80 Inline::UnknownInline { content, .. } => out.push_str(&inlines_text(content)),
81 Inline::RawInline { .. } => {}
82 }
83 }
84 out
85}
86
87pub fn document_to_html(doc: &Document) -> String {
89 document_to_html_with_options(doc, &SerializeOptions::default())
90}
91
92pub fn document_to_html_with_options(doc: &Document, options: &SerializeOptions) -> String {
94 let ctx = SerializeCtx {
95 assets: &doc.assets,
96 options,
97 };
98
99 let mut out = String::new();
100 serialize_blocks(&ctx, &doc.blocks, &mut out);
101 serialize_footnotes(&ctx, &doc.footnotes, &mut out);
102 out
103}
104
105pub fn inlines_to_html(inlines: &[Inline], assets: &BTreeMap<AssetId, Asset>) -> String {
107 inlines_to_html_with_options(inlines, assets, &SerializeOptions::default())
108}
109
110pub fn inlines_to_html_with_options(
112 inlines: &[Inline],
113 assets: &BTreeMap<AssetId, Asset>,
114 options: &SerializeOptions,
115) -> String {
116 let ctx = SerializeCtx { assets, options };
117 serialize_inlines(&ctx, inlines)
118}
119
120fn serialize_blocks(ctx: &SerializeCtx<'_>, blocks: &[Block], out: &mut String) {
121 for block in blocks {
122 serialize_block(ctx, block, out);
123 }
124}
125
126fn serialize_block(ctx: &SerializeCtx<'_>, block: &Block, out: &mut String) {
127 match block {
128 Block::Heading {
129 level,
130 id,
131 content,
132 attrs,
133 } => {
134 let mut extra = Vec::new();
135 if let Some(anchor) = id {
136 extra.push(("id", anchor.0.clone()));
137 }
138 let attr_str = block_attrs_to_html(attrs, &extra, &[]);
139 out.push_str(&format!(
140 "<h{}{}>{}</h{}>\n",
141 level.get(),
142 attr_str,
143 serialize_inlines(ctx, content),
144 level.get()
145 ));
146 }
147 Block::Paragraph { content, attrs } => {
148 let attr_str = block_attrs_to_html(attrs, &[], &[]);
149 out.push_str(&format!(
150 "<p{}>{}</p>\n",
151 attr_str,
152 serialize_inlines(ctx, content)
153 ));
154 }
155 Block::Quote {
156 blocks,
157 cite,
158 attrs,
159 } => {
160 let mut extra = Vec::new();
161 if let Some(url) = cite {
162 extra.push(("cite", url.0.clone()));
163 }
164 let attr_str = block_attrs_to_html(attrs, &extra, &[]);
165 let mut content = String::new();
166 serialize_blocks(ctx, blocks, &mut content);
167 out.push_str(&format!(
168 "<blockquote{}>{}</blockquote>\n",
169 attr_str,
170 content.trim_end()
171 ));
172 }
173 Block::CodeBlock {
174 code,
175 language,
176 filename,
177 highlight_lines,
178 highlighted_html,
179 attrs,
180 } => {
181 let mut extra = Vec::new();
182 if let Some(name) = filename {
183 extra.push(("data-filename", name.clone()));
184 }
185 if !highlight_lines.is_empty() {
186 let lines = highlight_lines
187 .iter()
188 .map(u32::to_string)
189 .collect::<Vec<_>>()
190 .join(",");
191 extra.push(("data-highlight-lines", lines));
192 }
193 let pre_attr = block_attrs_to_html(attrs, &extra, &[]);
194
195 let mut code_extra = Vec::new();
196 if let Some(lang) = language {
197 code_extra.push(("data-lang", lang.clone()));
198 code_extra.push(("class", format!("hljs language-{}", lang)));
199 }
200 let code_attr = extra_attrs_to_html(&code_extra);
201 let code_content = if ctx.options.use_code_highlight {
202 highlighted_html.as_deref().unwrap_or(code)
203 } else {
204 code
205 };
206 let code_body = if ctx.options.use_code_highlight && highlighted_html.is_some() {
207 code_content.to_string()
208 } else {
209 escape_html_text(code_content)
210 };
211
212 out.push_str(&format!(
213 "<pre{}><code{}>{}</code></pre>\n",
214 pre_attr, code_attr, code_body
215 ));
216 }
217 Block::Divider { attrs } => {
218 let attr_str = block_attrs_to_html(attrs, &[], &[]);
219 out.push_str(&format!("<hr{}>\n", attr_str));
220 }
221 Block::List { list, attrs } => serialize_list(ctx, list, attrs, out),
222 Block::DefinitionList { items, attrs } => serialize_definition_list(ctx, items, attrs, out),
223 Block::Table {
224 caption,
225 sections,
226 attrs,
227 } => {
228 let attr_str = block_attrs_to_html(attrs, &[], &[]);
229 out.push_str(&format!("<table{}>", attr_str));
230
231 if let Some(caption_blocks) = caption {
232 out.push_str("<caption>");
233 serialize_blocks(ctx, caption_blocks, out);
234 out.push_str("</caption>");
235 }
236
237 for section in sections {
238 let section_tag = match section.kind {
239 TableSectionKind::Head => "thead",
240 TableSectionKind::Body => "tbody",
241 TableSectionKind::Foot => "tfoot",
242 };
243 let section_attr = block_attrs_to_html(§ion.attrs, &[], &[]);
244 out.push_str(&format!("<{}{}>", section_tag, section_attr));
245 for row in §ion.rows {
246 let row_attr = block_attrs_to_html(&row.attrs, &[], &[]);
247 out.push_str(&format!("<tr{}>", row_attr));
248 for cell in &row.cells {
249 serialize_table_cell(ctx, cell, out);
250 }
251 out.push_str("</tr>");
252 }
253 out.push_str(&format!("</{}>", section_tag));
254 }
255
256 out.push_str("</table>\n");
257 }
258 Block::Figure {
259 content,
260 caption,
261 attrs,
262 } => {
263 let attr_str = block_attrs_to_html(attrs, &[], &[]);
264 out.push_str(&format!("<figure{}>", attr_str));
265 serialize_blocks(ctx, content, out);
266 if let Some(caption_blocks) = caption {
267 out.push_str("<figcaption>");
268 serialize_blocks(ctx, caption_blocks, out);
269 out.push_str("</figcaption>");
270 }
271 out.push_str("</figure>\n");
272 }
273 Block::Admonition {
274 kind,
275 title,
276 blocks,
277 attrs,
278 } => {
279 let wrapper_tag = if ctx.options.blockquote_for_admonition {
280 "blockquote"
281 } else {
282 "div"
283 };
284 let mut classes = vec!["admonition".to_string(), admonition_kind_class(kind)];
285 classes.extend(attrs.classes.iter().cloned());
286 let attr_str = attrs_to_html(
287 &classes,
288 attrs.style.as_deref(),
289 &attrs.passthrough,
290 &[],
291 &["class"],
292 );
293
294 out.push_str(&format!("<{}{}>", wrapper_tag, attr_str));
295 if let Some(t) = title {
296 out.push_str(&format!(
297 "<p class=\"admonition-title\"><strong>{}</strong></p>",
298 serialize_inlines(ctx, t)
299 ));
300 }
301 serialize_blocks(ctx, blocks, out);
302 out.push_str(&format!("</{}>\n", wrapper_tag));
303 }
304 Block::Details {
305 summary,
306 blocks,
307 open,
308 attrs,
309 } => {
310 let mut extra = Vec::new();
311 if *open {
312 extra.push(("open", "open".to_string()));
313 }
314 let attr_str = block_attrs_to_html(attrs, &extra, &[]);
315 out.push_str(&format!("<details{}>", attr_str));
316 if let Some(sum) = summary {
317 out.push_str(&format!(
318 "<summary>{}</summary>",
319 serialize_inlines(ctx, sum)
320 ));
321 }
322 serialize_blocks(ctx, blocks, out);
323 out.push_str("</details>\n");
324 }
325 Block::MathBlock { math, attrs } => {
326 out.push_str(&serialize_math_block(ctx, math, attrs));
327 out.push('\n');
328 }
329 Block::SvgBlock { svg, attrs } => {
330 out.push_str(&serialize_svg_block(ctx, svg, attrs));
331 out.push('\n');
332 }
333 Block::UnknownBlock {
334 tag,
335 attrs,
336 children,
337 data: _,
338 note,
339 source,
340 } => {
341 let mut extra = vec![("data-unknown-block", tag.clone())];
342 if let Some(n) = note {
343 extra.push(("data-unknown-note", n.clone()));
344 }
345 let attr_str = block_attrs_to_html(attrs, &extra, &[]);
346 out.push_str(&format!("<div{}>", attr_str));
347 if let Some(src) = source {
348 out.push_str(&format!(
349 "<pre data-unknown-source=\"true\">{}</pre>",
350 escape_html_text(src)
351 ));
352 }
353 serialize_unknown_children(ctx, children, out);
354 out.push_str("</div>\n");
355 }
356 Block::RawBlock {
357 html,
358 origin: _,
359 trust: _,
360 attrs: _,
361 } => {
362 out.push_str(html);
363 if !html.ends_with('\n') {
364 out.push('\n');
365 }
366 }
367 }
368}
369
370fn serialize_unknown_children(ctx: &SerializeCtx<'_>, children: &[UnknownChild], out: &mut String) {
371 for child in children {
372 match child {
373 UnknownChild::Block(block) => serialize_block(ctx, block, out),
374 UnknownChild::Inline(inline) => out.push_str(&serialize_inline(ctx, inline)),
375 }
376 }
377}
378
379fn serialize_definition_list(
380 ctx: &SerializeCtx<'_>,
381 items: &[DefinitionItem],
382 attrs: &BlockAttrs,
383 out: &mut String,
384) {
385 if ctx.options.definition_list_to_paragraph {
386 for item in items {
387 let term_html = item
388 .terms
389 .iter()
390 .map(|blocks| blocks_inline_fallback_html(ctx, blocks))
391 .filter(|s| !s.trim().is_empty())
392 .collect::<Vec<_>>()
393 .join(" / ");
394 let def_html = item
395 .definitions
396 .iter()
397 .map(|blocks| blocks_inline_fallback_html(ctx, blocks))
398 .filter(|s| !s.trim().is_empty())
399 .collect::<Vec<_>>()
400 .join(" ");
401 out.push_str(&format!(
402 "<p><strong>{}</strong>: {}</p>\n",
403 term_html, def_html
404 ));
405 }
406 return;
407 }
408
409 let attr_str = block_attrs_to_html(attrs, &[], &[]);
410 out.push_str(&format!("<dl{}>", attr_str));
411 for item in items {
412 for terms in &item.terms {
413 out.push_str("<dt>");
414 serialize_blocks(ctx, terms, out);
415 out.push_str("</dt>");
416 }
417 for defs in &item.definitions {
418 out.push_str("<dd>");
419 serialize_blocks(ctx, defs, out);
420 out.push_str("</dd>");
421 }
422 }
423 out.push_str("</dl>\n");
424}
425
426fn blocks_inline_fallback_html(ctx: &SerializeCtx<'_>, blocks: &[Block]) -> String {
427 let mut parts = Vec::new();
428 for block in blocks {
429 match block {
430 Block::Paragraph { content, .. } | Block::Heading { content, .. } => {
431 parts.push(serialize_inlines(ctx, content));
432 }
433 Block::CodeBlock { code, .. } => {
434 parts.push(format!("<code>{}</code>", escape_html_text(code)));
435 }
436 Block::MathBlock { math, .. } => {
437 parts.push(escape_html_text(&math_source_text(&math.src)));
438 }
439 Block::SvgBlock { svg, .. } => {
440 parts.push(escape_html_text(&math_source_text(&svg.src)));
441 }
442 _ => parts.push(escape_html_text(&block_inline_fallback_text(block))),
443 }
444 }
445 parts.join(" ")
446}
447
448fn block_inline_fallback_text(block: &Block) -> String {
449 match block {
450 Block::Heading { content, .. } | Block::Paragraph { content, .. } => inlines_text(content),
451 Block::Quote { blocks, .. }
452 | Block::Figure {
453 content: blocks, ..
454 }
455 | Block::Admonition { blocks, .. }
456 | Block::Details { blocks, .. } => blocks
457 .iter()
458 .map(block_inline_fallback_text)
459 .collect::<Vec<_>>()
460 .join(" "),
461 Block::CodeBlock { code, .. } => code.clone(),
462 Block::List { list, .. } => match &list.kind {
463 ListKind::Bullet { items } | ListKind::Numbered { items, .. } => items
464 .iter()
465 .flat_map(|i| i.blocks.iter())
466 .map(block_inline_fallback_text)
467 .collect::<Vec<_>>()
468 .join(" "),
469 ListKind::Task { items } => items
470 .iter()
471 .flat_map(|i| i.blocks.iter())
472 .map(block_inline_fallback_text)
473 .collect::<Vec<_>>()
474 .join(" "),
475 ListKind::Custom { items, .. } => items
476 .iter()
477 .flat_map(|i| i.blocks.iter())
478 .map(block_inline_fallback_text)
479 .collect::<Vec<_>>()
480 .join(" "),
481 },
482 Block::DefinitionList { items, .. } => items
483 .iter()
484 .flat_map(|item| item.terms.iter().chain(item.definitions.iter()))
485 .flat_map(|group| group.iter())
486 .map(block_inline_fallback_text)
487 .collect::<Vec<_>>()
488 .join(" "),
489 Block::Table { sections, .. } => sections
490 .iter()
491 .flat_map(|s| s.rows.iter())
492 .flat_map(|r| r.cells.iter())
493 .flat_map(|c| c.blocks.iter())
494 .map(block_inline_fallback_text)
495 .collect::<Vec<_>>()
496 .join(" "),
497 Block::MathBlock { math, .. } => math_source_text(&math.src),
498 Block::SvgBlock { svg, .. } => math_source_text(&svg.src),
499 Block::UnknownBlock { note, .. } => note.clone().unwrap_or_default(),
500 Block::RawBlock { .. } | Block::Divider { .. } => String::new(),
501 }
502}
503
504fn serialize_inlines(ctx: &SerializeCtx<'_>, inlines: &[Inline]) -> String {
505 let mut out = String::new();
506 for inline in inlines {
507 out.push_str(&serialize_inline(ctx, inline));
508 }
509 out
510}
511
512fn serialize_inline(ctx: &SerializeCtx<'_>, inline: &Inline) -> String {
513 match inline {
514 Inline::Text(text) => escape_html_text(text),
515 Inline::Code(code) => format!("<code>{}</code>", escape_html_text(code)),
516 Inline::SoftBreak => " ".to_string(),
517 Inline::HardBreak => "<br>".to_string(),
518 Inline::Styled {
519 styles,
520 content,
521 attrs,
522 } => serialize_styled_inline(ctx, styles.styles(), content, attrs),
523 Inline::Link {
524 content,
525 href,
526 title,
527 attrs,
528 } => {
529 let mut extra = vec![("href", href.0.clone())];
530 if let Some(t) = title {
531 extra.push(("title", t.clone()));
532 }
533 let attr_str = inline_attrs_to_html(attrs, &extra, &["href", "title"]);
534 format!("<a{}>{}</a>", attr_str, serialize_inlines(ctx, content))
535 }
536 Inline::Image {
537 asset,
538 alt,
539 title,
540 attrs,
541 } => serialize_image_inline(ctx, asset, alt, title.as_deref(), attrs),
542 Inline::FootnoteRef(id) => {
543 let escaped = escape_html_attr(&id.0);
544 format!(
545 "<sup><a href=\"#fn-{}\" id=\"fnref-{}\">[{}]</a></sup>",
546 escaped, escaped, escaped
547 )
548 }
549 Inline::MathInline { math, attrs } => serialize_math_inline(ctx, math, attrs),
550 Inline::SvgInline { svg, attrs } => serialize_svg_inline(ctx, svg, attrs),
551 Inline::UnknownInline {
552 tag,
553 attrs,
554 content,
555 data: _,
556 note,
557 source,
558 } => {
559 let mut extra = vec![("data-unknown-inline", tag.clone())];
560 if let Some(n) = note {
561 extra.push(("data-unknown-note", n.clone()));
562 }
563 let attr_str = inline_attrs_to_html(attrs, &extra, &[]);
564 let mut html = String::new();
565 html.push_str(&format!("<span{}>", attr_str));
566 html.push_str(&serialize_inlines(ctx, content));
567 if let Some(src) = source {
568 html.push_str(&format!(
569 "<code data-unknown-source=\"true\">{}</code>",
570 escape_html_text(src)
571 ));
572 }
573 html.push_str("</span>");
574 html
575 }
576 Inline::RawInline {
577 html,
578 origin: _,
579 trust: _,
580 attrs: _,
581 } => html.clone(),
582 }
583}
584
585fn serialize_styled_inline(
586 ctx: &SerializeCtx<'_>,
587 styles: &[TextStyle],
588 content: &[Inline],
589 attrs: &InlineAttrs,
590) -> String {
591 let mut html = serialize_inlines(ctx, content);
592 if attrs != &InlineAttrs::default() {
593 let attr_str = inline_attrs_to_html(attrs, &[], &[]);
594 html = format!("<span{}>{}</span>", attr_str, html);
595 }
596
597 for style in styles.iter().rev() {
598 let (open, close) = text_style_tag(*style);
599 html = format!("<{}>{}</{}>", open, html, close);
600 }
601
602 html
603}
604
605fn text_style_tag(style: TextStyle) -> (&'static str, &'static str) {
606 match style {
607 TextStyle::Bold => ("strong", "strong"),
608 TextStyle::Italic => ("em", "em"),
609 TextStyle::Strikethrough => ("s", "s"),
610 TextStyle::Underline => ("u", "u"),
611 TextStyle::Mark => ("mark", "mark"),
612 TextStyle::Superscript => ("sup", "sup"),
613 TextStyle::Subscript => ("sub", "sub"),
614 TextStyle::Kbd => ("kbd", "kbd"),
615 }
616}
617
618fn serialize_image_inline(
619 ctx: &SerializeCtx<'_>,
620 asset_ref: &AssetRef,
621 alt: &str,
622 title: Option<&str>,
623 attrs: &ImageAttrs,
624) -> String {
625 let mut extra = vec![("alt", alt.to_string())];
626
627 if let Some(src) = resolve_asset_src(asset_ref, ctx.assets) {
628 extra.push(("src", src));
629 } else {
630 extra.push(("src", "".to_string()));
631 extra.push(("data-missing-asset", asset_ref.0.0.clone()));
632 }
633
634 if let Some(t) = title {
635 extra.push(("title", t.to_string()));
636 }
637 if let Some(width) = attrs.width {
638 extra.push(("width", width.to_string()));
639 }
640 if let Some(height) = attrs.height {
641 extra.push(("height", height.to_string()));
642 }
643 if let Some(align) = attrs.align {
644 extra.push(("data-align", text_align_css_value(align).to_string()));
645 }
646
647 let attr_str = attrs_to_html(
648 &[],
649 None,
650 &attrs.passthrough,
651 &extra,
652 &["src", "alt", "title", "width", "height", "data-align"],
653 );
654 format!("<img{}>", attr_str)
655}
656
657fn resolve_asset_src(asset_ref: &AssetRef, assets: &BTreeMap<AssetId, Asset>) -> Option<String> {
658 let asset = assets.get(&asset_ref.0)?;
659 let (source, variants) = match asset {
660 Asset::Image(a) => (&a.source, &a.variants),
661 Asset::Video(a) | Asset::Audio(a) => (&a.source, &a.variants),
662 Asset::File(a) => (&a.source, &a.variants),
663 Asset::Custom(a) => (&a.source, &a.variants),
664 };
665
666 if let Some(url) = variants
667 .iter()
668 .find(|v| v.name == "original")
669 .map(|v| v.publish_url.0.clone())
670 .or_else(|| variants.first().map(|v| v.publish_url.0.clone()))
671 {
672 return Some(url);
673 }
674
675 match source {
676 AssetSource::RemoteUrl { url } => Some(url.0.clone()),
677 AssetSource::DataUri { uri } => Some(uri.clone()),
678 AssetSource::LocalPath { path } => Some(path.as_str().to_string()),
679 }
680}