use std::fmt::Write;
use std::sync::{Arc, OnceLock};
use ecow::{eco_format, EcoString};
use reflexo::error::prelude::*;
use reflexo::typst::TypstHtmlDocument;
use tinymist_world::{CompilerFeat, ExportComputation, WorldComputeGraph};
use typst::diag::{bail, At, SourceResult, StrResult};
use typst::foundations::Repr;
use typst::introspection::Introspector;
use typst::syntax::Span;
use typst_html::{charsets, tag, HtmlAttr, HtmlElement, HtmlFrame, HtmlNode, HtmlTag};
pub type ExportStaticHtmlTask = tinymist_task::ExportHtmlTask;
pub type StaticHtmlExport = tinymist_task::HtmlExport;
pub type ExportHtmlTask = tinymist_task::ExportHtmlTask;
pub struct HtmlOutputExport;
impl<F: CompilerFeat> ExportComputation<F, TypstHtmlDocument> for HtmlOutputExport {
type Output = HtmlOutput;
type Config = ExportHtmlTask;
fn run(
_graph: &Arc<WorldComputeGraph<F>>,
doc: &Arc<TypstHtmlDocument>,
_config: &ExportHtmlTask,
) -> Result<HtmlOutput> {
Ok(static_html(doc)?)
}
}
pub struct HtmlOutput {
pretty: bool,
document: Arc<TypstHtmlDocument>,
head_idx: Option<usize>,
body_idx: Option<usize>,
body: OnceLock<SourceResult<String>>,
html: OnceLock<SourceResult<String>>,
}
impl HtmlOutput {
fn root_child(&self, idx: Option<usize>) -> Option<&HtmlElement> {
match self.document.root.children.get(idx?)? {
HtmlNode::Element(e) => Some(e),
_ => None,
}
}
fn head(&self) -> Option<&HtmlElement> {
self.root_child(self.head_idx)
}
pub fn title(&self) -> Option<&EcoString> {
self.head()?
.children
.iter()
.find_map(|node| match node {
HtmlNode::Element(e) if e.tag == tag::title => e.children.first(),
_ => None,
})
.and_then(|node| match node {
HtmlNode::Text(text, _) => Some(text),
_ => None,
})
}
pub fn description(&self) -> Option<&EcoString> {
self.head()?.children.iter().find_map(|node| match node {
HtmlNode::Element(e) if e.tag == tag::meta => {
let mut name = false;
let mut description = None;
for (attr, value) in &e.attrs.0 {
let attr = attr.resolve();
match attr.as_str() {
"name" => {
name |= value == "description";
}
"content" => description = Some(value),
_ => {}
}
}
if name {
description
} else {
None
}
}
_ => None,
})
}
pub fn body(&self) -> SourceResult<&str> {
self.body
.get_or_init(|| {
let introspector = &self.document.introspector;
let mut w = Writer::new(introspector, self.pretty);
write_indent(&mut w);
if let Some(body) = self.root_child(self.body_idx) {
write_element_with_tag(&mut w, body, "div")?;
}
if w.pretty {
w.buf.push('\n');
}
Ok(w.buf)
})
.as_ref()
.map(|s| s.as_str())
.map_err(|e| e.clone())
}
pub fn html(&self) -> SourceResult<&str> {
self.html
.get_or_init(|| {
let introspector = &self.document.introspector;
let mut w = Writer::new(introspector, self.pretty);
w.buf.push_str("<!DOCTYPE html>\n");
write_indent(&mut w);
write_element(&mut w, &self.document.root)?;
if w.pretty {
w.buf.push('\n');
}
Ok(w.buf)
})
.as_ref()
.map(|s| s.as_str())
.map_err(|e| e.clone())
}
#[cfg(feature = "hast")]
pub fn hast(&self) -> SourceResult<reflexo_typst2hast::hast::HastElementContent> {
reflexo_typst2hast::hast(&self.document)
}
}
const TYPE: HtmlAttr = HtmlAttr::constant("type");
fn find_tag_child(element: &HtmlElement, tag: HtmlTag) -> Option<usize> {
element.children.iter().position(|node| match node {
HtmlNode::Element(e) => e.tag == tag,
_ => false,
})
}
pub fn static_html(document: &Arc<TypstHtmlDocument>) -> SourceResult<HtmlOutput> {
let head_idx = find_tag_child(&document.root, tag::head);
let body_idx = find_tag_child(&document.root, tag::body);
Ok(HtmlOutput {
pretty: true,
document: document.clone(),
head_idx,
body_idx,
body: OnceLock::new(),
html: OnceLock::new(),
})
}
struct Writer<'a> {
buf: String,
level: usize,
introspector: &'a Introspector,
pretty: bool,
}
impl<'a> Writer<'a> {
fn new(introspector: &'a Introspector, pretty: bool) -> Self {
Self {
buf: String::new(),
level: 0,
introspector,
pretty,
}
}
}
fn write_indent(w: &mut Writer) {
if w.pretty {
w.buf.push('\n');
for _ in 0..w.level {
w.buf.push_str(" ");
}
}
}
fn write_node(w: &mut Writer, node: &HtmlNode, escape_text: bool) -> SourceResult<()> {
match node {
HtmlNode::Tag(_) => {}
HtmlNode::Text(text, span) => write_text(w, text, *span, escape_text)?,
HtmlNode::Element(element) => write_element(w, element)?,
HtmlNode::Frame(frame) => write_frame(w, frame),
}
Ok(())
}
fn write_text(w: &mut Writer, text: &str, span: Span, escape: bool) -> SourceResult<()> {
for c in text.chars() {
if escape || !charsets::is_valid_in_normal_element_text(c) {
write_escape(w, c).at(span)?;
} else {
w.buf.push(c);
}
}
Ok(())
}
fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
write_element_with_tag(w, element, &element.tag.resolve())
}
fn write_element_with_tag(w: &mut Writer, element: &HtmlElement, tag: &str) -> SourceResult<()> {
w.buf.push('<');
w.buf.push_str(tag);
for (attr, value) in &element.attrs.0 {
w.buf.push(' ');
w.buf.push_str(&attr.resolve());
if !value.is_empty() {
w.buf.push('=');
w.buf.push('"');
for c in value.chars() {
if charsets::is_valid_in_attribute_value(c) {
w.buf.push(c);
} else {
write_escape(w, c).at(element.span)?;
}
}
w.buf.push('"');
}
}
w.buf.push('>');
if tag::is_void(element.tag) {
if !element.children.is_empty() {
bail!(element.span, "HTML void elements must not have children");
}
return Ok(());
}
if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) {
w.buf.push('\n');
}
if tag::is_raw(element.tag) {
write_raw(w, element)?;
} else if tag::is_escapable_raw(element.tag) {
write_escapable_raw(w, element)?;
} else if !element.children.is_empty() {
write_children(w, element)?;
}
w.buf.push_str("</");
w.buf.push_str(tag);
w.buf.push('>');
Ok(())
}
fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
let pretty = w.pretty;
let pretty_inside = allows_pretty_inside(element.tag)
&& element.children.iter().any(|node| match node {
HtmlNode::Element(child) => wants_pretty_around(child.tag),
HtmlNode::Frame(_) => true,
_ => false,
});
w.pretty &= pretty_inside;
let mut indent = w.pretty;
w.level += 1;
for c in &element.children {
let pretty_around = match c {
HtmlNode::Tag(_) => continue,
HtmlNode::Element(child) => w.pretty && wants_pretty_around(child.tag),
HtmlNode::Text(..) | HtmlNode::Frame(_) => false,
};
if core::mem::take(&mut indent) || pretty_around {
write_indent(w);
}
write_node(w, c, element.pre_span)?;
indent = pretty_around;
}
w.level -= 1;
write_indent(w);
w.pretty = pretty;
Ok(())
}
fn starts_with_newline(element: &HtmlElement) -> bool {
for child in &element.children {
match child {
HtmlNode::Tag(_) => {}
HtmlNode::Text(text, _) => return text.starts_with(['\n', '\r']),
_ => return false,
}
}
false
}
fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
let text = collect_raw_text(element)?;
if let Some(closing) = find_closing_tag(&text, element.tag) {
bail!(
element.span,
"HTML raw text element cannot contain its own closing tag";
hint: "the sequence `{closing}` appears in the raw text",
)
}
let mode = if w.pretty {
RawMode::of(element, &text)
} else {
RawMode::Keep
};
match mode {
RawMode::Keep => {
w.buf.push_str(&text);
}
RawMode::Wrap => {
w.buf.push('\n');
w.buf.push_str(&text);
write_indent(w);
}
RawMode::Indent => {
w.level += 1;
for line in text.lines() {
write_indent(w);
w.buf.push_str(line);
}
w.level -= 1;
write_indent(w);
}
}
Ok(())
}
fn write_escapable_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
walk_raw_text(element, |piece, span| write_text(w, piece, span, false))
}
fn collect_raw_text(element: &HtmlElement) -> SourceResult<String> {
let mut text = String::new();
walk_raw_text(element, |piece, span| {
if let Some(c) = piece.chars().find(|&c| !charsets::is_w3c_text_char(c)) {
return Err(unencodable(c)).at(span);
}
text.push_str(piece);
Ok(())
})?;
Ok(text)
}
fn walk_raw_text(
element: &HtmlElement,
mut f: impl FnMut(&str, Span) -> SourceResult<()>,
) -> SourceResult<()> {
for c in &element.children {
match c {
HtmlNode::Tag(_) => continue,
HtmlNode::Text(text, span) => f(text, *span)?,
HtmlNode::Element(HtmlElement { span, .. })
| HtmlNode::Frame(HtmlFrame { span, .. }) => {
bail!(*span, "HTML raw text element cannot have non-text children")
}
}
}
Ok(())
}
fn find_closing_tag(text: &str, tag: HtmlTag) -> Option<&str> {
let s = tag.resolve();
let len = s.len();
text.match_indices("</").find_map(|(i, _)| {
let rest = &text[i + 2..];
let disallowed = rest.len() >= len
&& rest[..len].eq_ignore_ascii_case(&s)
&& rest[len..].starts_with(['\t', '\n', '\u{c}', '\r', ' ', '>', '/']);
disallowed.then(|| &text[i..i + 2 + len])
})
}
enum RawMode {
Keep,
Wrap,
Indent,
}
impl RawMode {
fn of(element: &HtmlElement, text: &str) -> Self {
match element.tag {
tag::script
if !element
.attrs
.0
.iter()
.any(|(attr, value)| *attr == TYPE && value != "text/javascript") =>
{
if text.contains('`') {
Self::Wrap
} else {
Self::Indent
}
}
tag::style => Self::Indent,
_ => Self::Keep,
}
}
}
fn allows_pretty_inside(tag: HtmlTag) -> bool {
(tag::is_block_by_default(tag) && tag != tag::pre)
|| tag::is_tabular_by_default(tag)
|| tag == tag::li
}
fn wants_pretty_around(tag: HtmlTag) -> bool {
allows_pretty_inside(tag) || tag::is_metadata(tag) || tag == tag::pre
}
fn write_escape(w: &mut Writer, c: char) -> StrResult<()> {
match c {
'&' => w.buf.push_str("&"),
'<' => w.buf.push_str("<"),
'>' => w.buf.push_str(">"),
'"' => w.buf.push_str("""),
'\'' => w.buf.push_str("'"),
c if charsets::is_w3c_text_char(c) && c != '\r' => {
write!(w.buf, "&#x{:x};", c as u32).unwrap()
}
_ => return Err(unencodable(c)),
}
Ok(())
}
#[cold]
fn unencodable(c: char) -> EcoString {
eco_format!("the character `{}` cannot be encoded in HTML", c.repr())
}
fn write_frame(w: &mut Writer, frame: &HtmlFrame) {
let svg = typst_svg::svg_html_frame(
&frame.inner,
frame.text_size,
frame.id.as_deref(),
&frame.link_points,
w.introspector,
);
w.buf.push_str(&svg);
}