use super::{
attributes::Attributes,
document::Document,
node::{ElementData, NodeData, NodeId},
DocumentStyleMap, InliningMode,
};
use crate::{html::ElementStyleMap, parser, InlineError};
use html5ever::{local_name, ns, tendril::StrTendril, LocalName, QualName};
use memchr::{memchr3_iter, memchr_iter};
use smallvec::{smallvec, SmallVec};
use std::io::Write;
#[inline]
fn supports_dimension_attrs(name: &LocalName) -> bool {
matches!(
*name,
local_name!("table") | local_name!("td") | local_name!("th") | local_name!("img")
)
}
#[inline]
fn is_table_element(name: &LocalName) -> bool {
matches!(
*name,
local_name!("table") | local_name!("td") | local_name!("th")
)
}
#[derive(Clone, Copy)]
enum DimensionValue<'a> {
Numeric(&'a str),
Percent(&'a str),
Auto,
}
impl DimensionValue<'_> {
#[inline]
fn write_to<W: Write>(&self, writer: &mut W) -> Result<(), InlineError> {
match self {
DimensionValue::Numeric(n) => writer.write_all(n.as_bytes())?,
DimensionValue::Percent(n) => {
writer.write_all(n.as_bytes())?;
writer.write_all(b"%")?;
}
DimensionValue::Auto => writer.write_all(b"auto")?,
}
Ok(())
}
}
#[inline]
#[allow(clippy::arithmetic_side_effects)]
fn extract_dimension_value(value: &str, allow_percent: bool) -> Option<DimensionValue<'_>> {
let value = value.trim();
if value.eq_ignore_ascii_case("auto") {
return Some(DimensionValue::Auto);
}
let bytes = value.as_bytes();
let mut end = 0;
let mut has_dot = false;
if bytes.first() == Some(&b'-') || bytes.first() == Some(&b'+') {
end = 1;
}
while end < bytes.len() {
match bytes[end] {
b'0'..=b'9' => end += 1,
b'.' if !has_dot => {
has_dot = true;
end += 1;
}
_ => break,
}
}
if end == 0 || (end == 1 && (bytes[0] == b'-' || bytes[0] == b'+')) {
return None;
}
let numeric_part = &value[..end];
let unit_part = value[end..].trim();
let unit_part = unit_part
.strip_suffix("!important")
.map_or(unit_part, str::trim);
match unit_part {
"" | "px" => Some(DimensionValue::Numeric(numeric_part)),
"%" if allow_percent => Some(DimensionValue::Percent(numeric_part)),
_ => None,
}
}
#[inline]
fn find_style_value<'a>(styles: &'a ElementStyleMap<'_>, property: &str) -> Option<&'a str> {
styles
.iter()
.rev()
.find(|(name, _, _)| *name == property)
.map(|(_, _, value)| *value)
}
#[allow(clippy::too_many_arguments, clippy::fn_params_excessive_bools)]
pub(crate) fn serialize_to<W: Write>(
document: &Document,
writer: &mut W,
styles: DocumentStyleMap<'_>,
keep_style_tags: bool,
keep_link_tags: bool,
minify_css: bool,
at_rules: Option<&String>,
mode: InliningMode,
apply_width_attributes: bool,
apply_height_attributes: bool,
) -> Result<(), InlineError> {
let sink = Sink::new(
document,
NodeId::document_id(),
keep_style_tags,
keep_link_tags,
minify_css,
at_rules,
mode,
apply_width_attributes,
apply_height_attributes,
);
let mut ser = HtmlSerializer::new(writer, styles);
sink.serialize(&mut ser)
}
#[allow(clippy::struct_excessive_bools)]
struct Sink<'a> {
document: &'a Document,
node: NodeId,
keep_style_tags: bool,
keep_link_tags: bool,
minify_css: bool,
at_rules: Option<&'a String>,
inlining_mode: InliningMode,
apply_width_attributes: bool,
apply_height_attributes: bool,
}
impl<'a> Sink<'a> {
#[allow(clippy::too_many_arguments, clippy::fn_params_excessive_bools)]
fn new(
document: &'a Document,
node: NodeId,
keep_style_tags: bool,
keep_link_tags: bool,
minify_css: bool,
at_rules: Option<&'a String>,
inlining_mode: InliningMode,
apply_width_attributes: bool,
apply_height_attributes: bool,
) -> Sink<'a> {
Sink {
document,
node,
keep_style_tags,
keep_link_tags,
minify_css,
at_rules,
inlining_mode,
apply_width_attributes,
apply_height_attributes,
}
}
#[inline]
fn for_node(&self, node: NodeId) -> Sink<'a> {
Sink::new(
self.document,
node,
self.keep_style_tags,
self.keep_link_tags,
self.minify_css,
self.at_rules,
self.inlining_mode,
self.apply_width_attributes,
self.apply_height_attributes,
)
}
#[inline]
fn data(&self) -> &NodeData {
&self.document[self.node].data
}
#[inline]
fn should_skip_element(&self, element: &ElementData) -> bool {
if element.name.local == local_name!("style") {
!self.keep_style_tags && element.attributes.get_css_inline() != Some("keep")
} else if element.name.local == local_name!("link")
&& element.attributes.get(local_name!("rel")) == Some("stylesheet")
{
!self.keep_link_tags
} else if element.name.local == local_name!("html") {
matches!(self.inlining_mode, InliningMode::Fragment)
} else {
false
}
}
fn serialize_children<W: Write>(
&self,
serializer: &mut HtmlSerializer<'_, W>,
) -> Result<(), InlineError> {
for child in self.document.children(self.node) {
self.for_node(child).serialize(serializer)?;
}
Ok(())
}
fn serialize<W: Write>(
&self,
serializer: &mut HtmlSerializer<'_, W>,
) -> Result<(), InlineError> {
match self.data() {
NodeData::Element {
element,
inlining_ignored,
} => {
if self.should_skip_element(element) {
return Ok(());
}
let style_node_id = if *inlining_ignored {
None
} else {
Some(self.node)
};
serializer.start_elem(
&element.name,
&element.attributes,
style_node_id,
self.minify_css,
self.apply_width_attributes,
self.apply_height_attributes,
)?;
if element.name.local == local_name!("head") {
if let Some(at_rules) = &self.at_rules {
if !at_rules.is_empty() {
serializer.write_at_rules_style(at_rules)?;
}
}
}
self.serialize_children(serializer)?;
serializer.end_elem(&element.name)?;
Ok(())
}
NodeData::Document => self.serialize_children(serializer),
NodeData::Doctype { name } => serializer.write_doctype(name),
NodeData::Text { text } => serializer.write_text(text),
NodeData::Comment { text } => serializer.write_comment(text),
NodeData::ProcessingInstruction { target, data } => {
serializer.write_processing_instruction(target, data)
}
}
}
}
struct ElemInfo {
html_name: Option<LocalName>,
ignore_children: bool,
}
struct HtmlSerializer<'a, Wr: Write> {
writer: Wr,
styles: DocumentStyleMap<'a>,
stack: Vec<ElemInfo>,
style_buffer: SmallVec<[Vec<u8>; 8]>,
}
impl<'a, W: Write> HtmlSerializer<'a, W> {
fn new(writer: W, styles: DocumentStyleMap<'a>) -> Self {
let mut stack = Vec::with_capacity(8);
stack.push(ElemInfo {
html_name: None,
ignore_children: false,
});
HtmlSerializer {
writer,
styles,
stack,
style_buffer: smallvec![],
}
}
fn parent(&mut self) -> &mut ElemInfo {
self.stack.last_mut().expect("no parent ElemInfo")
}
fn write_escaped(&mut self, text: &str) -> Result<(), InlineError> {
let mut last_end = 0;
for (start, part) in text.match_indices(['&', '\u{00A0}', '<', '>']) {
self.writer.write_all(
text.get(last_end..start)
.expect("Invalid substring")
.as_bytes(),
)?;
match (part.as_bytes()[0] & 0b0000_1110) >> 1 {
1 => self.writer.write_all(b" ")?,
3 => self.writer.write_all(b"&")?,
6 => self.writer.write_all(b"<")?,
7 => self.writer.write_all(b">")?,
_ => unreachable!(),
}
last_end = start.checked_add(part.len()).expect("Size overflow");
}
self.writer.write_all(
text.get(last_end..text.len())
.expect("Invalid substring")
.as_bytes(),
)?;
Ok(())
}
#[allow(clippy::arithmetic_side_effects)]
fn write_attributes(&mut self, text: &str) -> Result<(), InlineError> {
let bytes = text.as_bytes();
let mut last_end = 0;
for idx in memchr3_iter(b'&', b'"', 0xC2, bytes) {
match bytes[idx] {
b'&' => {
self.writer.write_all(&bytes[last_end..idx])?;
self.writer.write_all(b"&")?;
last_end = idx + 1;
}
b'"' => {
self.writer.write_all(&bytes[last_end..idx])?;
self.writer.write_all(b""")?;
last_end = idx + 1;
}
0xC2 if bytes.get(idx + 1) == Some(&0xA0) => {
self.writer.write_all(&bytes[last_end..idx])?;
self.writer.write_all(b" ")?;
last_end = idx + 2; }
_ => {} }
}
self.writer.write_all(&bytes[last_end..])?;
Ok(())
}
#[allow(clippy::too_many_lines)]
fn start_elem(
&mut self,
name: &QualName,
attrs: &Attributes,
style_node_id: Option<NodeId>,
minify_css: bool,
apply_width_attributes: bool,
apply_height_attributes: bool,
) -> Result<(), InlineError> {
let html_name = match name.ns {
ns!(html) => Some(name.local.clone()),
_ => None,
};
if self.parent().ignore_children {
self.stack.push(ElemInfo {
html_name,
ignore_children: true,
});
return Ok(());
}
let mut styles = if let Some(node_id) = style_node_id {
self.styles.get_mut(node_id.get()).and_then(|slot| {
slot.take().map(|mut styles| {
styles.sort_unstable_by(|a, b| a.1.cmp(&b.1));
styles
})
})
} else {
None
};
self.writer.write_all(b"<")?;
self.writer.write_all(name.local.as_bytes())?;
if let Some(class) = &attrs.class {
self.writer.write_all(b" class=\"")?;
self.writer.write_all(class.value.as_bytes())?;
self.writer.write_all(b"\"")?;
}
if let Some(ref html_name) = html_name {
if supports_dimension_attrs(html_name) {
let allow_percent = is_table_element(html_name);
if apply_width_attributes && !attrs.contains(local_name!("width")) {
if let Some(dim) = styles
.as_ref()
.and_then(|s| find_style_value(s, "width"))
.and_then(|v| extract_dimension_value(v, allow_percent))
{
self.writer.write_all(b" width=\"")?;
dim.write_to(&mut self.writer)?;
self.writer.write_all(b"\"")?;
}
}
if apply_height_attributes && !attrs.contains(local_name!("height")) {
if let Some(dim) = styles
.as_ref()
.and_then(|s| find_style_value(s, "height"))
.and_then(|v| extract_dimension_value(v, allow_percent))
{
self.writer.write_all(b" height=\"")?;
dim.write_to(&mut self.writer)?;
self.writer.write_all(b"\"")?;
}
}
}
}
for attr in &attrs.attributes {
self.writer.write_all(b" ")?;
match attr.name.ns {
ns!() => (),
ns!(xml) => self.writer.write_all(b"xml:")?,
ns!(xmlns) => {
if attr.name.local != local_name!("xmlns") {
self.writer.write_all(b"xmlns:")?;
}
}
ns!(xlink) => self.writer.write_all(b"xlink:")?,
_ => {
self.writer.write_all(b"unknown_namespace:")?;
}
}
self.writer.write_all(attr.name.local.as_bytes())?;
self.writer.write_all(b"=\"")?;
if attr.name.local == local_name!("style") {
if let Some(new_styles) = &styles {
merge_styles(
&mut self.writer,
&attr.value,
new_styles,
&mut self.style_buffer,
minify_css,
)?;
styles = None;
} else {
self.write_attributes(&attr.value)?;
}
} else {
self.write_attributes(&attr.value)?;
}
self.writer.write_all(b"\"")?;
}
if let Some(styles) = styles {
self.writer.write_all(b" style=\"")?;
if minify_css {
let mut it = styles.iter().peekable();
while let Some((property, _, value)) = it.next() {
write_declaration(&mut self.writer, property, value, minify_css)?;
if !minify_css || it.peek().is_some() {
self.writer.write_all(b";")?;
}
}
} else {
for (property, _, value) in styles {
write_declaration(&mut self.writer, property, value, minify_css)?;
self.writer.write_all(b";")?;
}
}
self.writer.write_all(b"\"")?;
}
self.writer.write_all(b">")?;
let ignore_children = name.ns == ns!(html)
&& matches!(
name.local,
local_name!("area")
| local_name!("base")
| local_name!("basefont")
| local_name!("bgsound")
| local_name!("br")
| local_name!("col")
| local_name!("embed")
| local_name!("frame")
| local_name!("hr")
| local_name!("img")
| local_name!("input")
| local_name!("keygen")
| local_name!("link")
| local_name!("meta")
| local_name!("param")
| local_name!("source")
| local_name!("track")
| local_name!("wbr")
);
self.stack.push(ElemInfo {
html_name,
ignore_children,
});
Ok(())
}
fn end_elem(&mut self, name: &QualName) -> Result<(), InlineError> {
let Some(info) = self.stack.pop() else {
panic!("no ElemInfo")
};
if info.ignore_children {
return Ok(());
}
self.writer.write_all(b"</")?;
self.writer.write_all(name.local.as_bytes())?;
self.writer.write_all(b">")?;
Ok(())
}
fn write_text(&mut self, text: &str) -> Result<(), InlineError> {
let escape = !matches!(
self.parent().html_name,
Some(
local_name!("style")
| local_name!("script")
| local_name!("xmp")
| local_name!("iframe")
| local_name!("noembed")
| local_name!("noframes")
| local_name!("plaintext")
| local_name!("noscript")
),
);
if escape {
self.write_escaped(text)?;
} else {
self.writer.write_all(text.as_bytes())?;
}
Ok(())
}
fn write_at_rules_style(&mut self, at_rules: &str) -> Result<(), InlineError> {
self.writer.write_all(b"<style>")?;
self.writer.write_all(at_rules.as_bytes())?;
self.writer.write_all(b"</style>")?;
Ok(())
}
fn write_comment(&mut self, text: &str) -> Result<(), InlineError> {
self.writer.write_all(b"<!--")?;
self.writer.write_all(text.as_bytes())?;
self.writer.write_all(b"-->")?;
Ok(())
}
fn write_doctype(&mut self, name: &str) -> Result<(), InlineError> {
self.writer.write_all(b"<!DOCTYPE ")?;
self.writer.write_all(name.as_bytes())?;
self.writer.write_all(b">")?;
Ok(())
}
fn write_processing_instruction(
&mut self,
target: &str,
data: &str,
) -> Result<(), InlineError> {
self.writer.write_all(b"<?")?;
self.writer.write_all(target.as_bytes())?;
self.writer.write_all(b" ")?;
self.writer.write_all(data.as_bytes())?;
self.writer.write_all(b">")?;
Ok(())
}
}
const STYLE_SEPARATOR: &[u8] = b": ";
const STYLE_SEPARATOR_MIN: &[u8] = b":";
#[inline]
fn write_declaration<Wr: Write>(
writer: &mut Wr,
name: &str,
value: &str,
minify_css: bool,
) -> Result<(), InlineError> {
writer.write_all(name.as_bytes())?;
if minify_css {
writer.write_all(STYLE_SEPARATOR_MIN)?;
} else {
writer.write_all(STYLE_SEPARATOR)?;
}
write_declaration_value(writer, value)
}
#[inline]
#[allow(clippy::arithmetic_side_effects)]
fn write_declaration_value<Wr: Write>(writer: &mut Wr, value: &str) -> Result<(), InlineError> {
let value = value.trim();
let bytes = value.as_bytes();
let mut last_end = 0;
for idx in memchr_iter(b'"', bytes) {
writer.write_all(&bytes[last_end..idx])?;
writer.write_all(b"'")?;
last_end = idx + 1;
}
writer.write_all(&bytes[last_end..])?;
Ok(())
}
macro_rules! push_or_update {
($style_buffer:expr, $length:expr, $name: expr, $value:expr, $minify_css:expr) => {{
if let Some(style) = $style_buffer.get_mut($length) {
style.clear();
write_declaration(style, &$name, $value, $minify_css)?;
} else {
let value = $value.trim();
let mut style = Vec::with_capacity(
$name
.len()
.saturating_add(STYLE_SEPARATOR.len())
.saturating_add(value.len()),
);
write_declaration(&mut style, &$name, $value, $minify_css)?;
$style_buffer.push(style);
};
$length = $length.saturating_add(1);
}};
}
fn merge_styles<Wr: Write>(
writer: &mut Wr,
current_style: &StrTendril,
new_styles: &ElementStyleMap<'_>,
declarations_buffer: &mut SmallVec<[Vec<u8>; 8]>,
minify_css: bool,
) -> Result<(), InlineError> {
let mut parser_input = cssparser::ParserInput::new(current_style);
let mut parser = cssparser::Parser::new(&mut parser_input);
let mut declaration_parser = parser::CSSDeclarationListParser;
let current_declarations = cssparser::RuleBodyParser::new(&mut parser, &mut declaration_parser);
let mut parsed_declarations_count: usize = 0;
for (idx, declaration) in current_declarations.enumerate() {
parsed_declarations_count = parsed_declarations_count.saturating_add(1);
let (property, value) = declaration?;
let estimated_declaration_size = property
.len()
.saturating_add(STYLE_SEPARATOR.len())
.saturating_add(value.len());
if let Some(buffer) = declarations_buffer.get_mut(idx) {
buffer.clear();
buffer.reserve(estimated_declaration_size);
write_declaration(buffer, &property, value, minify_css)?;
} else {
let mut buffer = Vec::with_capacity(estimated_declaration_size);
write_declaration(&mut buffer, &property, value, minify_css)?;
declarations_buffer.push(buffer);
}
}
let current_declarations_count = parsed_declarations_count;
let sep = if minify_css {
STYLE_SEPARATOR_MIN
} else {
STYLE_SEPARATOR
};
for (property, _, value) in new_styles {
match (
value.trim_end().strip_suffix("!important"),
declarations_buffer
.iter_mut()
.take(parsed_declarations_count)
.find(|style| {
style.starts_with(property.as_bytes())
&& style.get(property.len()..property.len().saturating_add(sep.len()))
== Some(sep)
}),
) {
(Some(value), Some(buffer)) => {
if !buffer.ends_with(b"!important") {
buffer.truncate(property.len().saturating_add(sep.len()));
write_declaration_value(buffer, value)?;
buffer.extend_from_slice(b" !important");
}
}
(Some(value), None) => {
push_or_update!(
declarations_buffer,
parsed_declarations_count,
property,
value,
minify_css
);
if let Some(buf) =
declarations_buffer.get_mut(parsed_declarations_count.saturating_sub(1))
{
buf.extend_from_slice(b" !important");
}
}
(None, None) => push_or_update!(
declarations_buffer,
parsed_declarations_count,
property,
value,
minify_css
),
(None, Some(_)) => {}
}
}
let mut first = true;
for range in [
current_declarations_count..parsed_declarations_count,
0..current_declarations_count,
] {
for declaration in &declarations_buffer[range] {
if first {
first = false;
} else {
writer.write_all(b";")?;
}
writer.write_all(declaration)?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use crate::html::InliningMode;
use super::Document;
#[test]
fn test_serialize() {
let doc = Document::parse_with_options(
b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head>",
0,
InliningMode::Document,
);
let mut buffer = Vec::new();
doc.serialize(
&mut buffer,
vec![None; doc.nodes.len()],
true,
false,
false,
None,
InliningMode::Document,
false,
false,
)
.expect("Should not fail");
assert_eq!(buffer, b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head><body></body></html>");
}
#[test]
fn test_skip_style_tags() {
let doc = Document::parse_with_options(
b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head>",
0,
InliningMode::Document,
);
let mut buffer = Vec::new();
doc.serialize(
&mut buffer,
vec![None; doc.nodes.len()],
false,
false,
false,
None,
InliningMode::Document,
false,
false,
)
.expect("Should not fail");
assert_eq!(buffer, b"<html><head></head><body></body></html>");
}
#[test]
fn test_escaped() {
let doc = Document::parse_with_options(
b"<!DOCTYPE html><html><head><title>& < > \xC2\xA0</title></head><body></body></html>",
0,
InliningMode::Document,
);
let mut buffer = Vec::new();
doc.serialize(
&mut buffer,
vec![None; doc.nodes.len()],
false,
false,
false,
None,
InliningMode::Document,
false,
false,
)
.expect("Should not fail");
assert_eq!(buffer, b"<!DOCTYPE html><html><head><title>& < > </title></head><body></body></html>");
}
#[test]
fn test_untouched_style() {
let doc = Document::parse_with_options(
b"<html><body><p style=\"color:blue;\"></p></body></html>",
0,
InliningMode::Document,
);
let mut buffer = Vec::new();
doc.serialize(
&mut buffer,
vec![None; doc.nodes.len()],
false,
false,
false,
None,
InliningMode::Document,
false,
false,
)
.expect("Should not fail");
assert_eq!(
buffer,
b"<html><head></head><body><p style=\"color:blue;\"></p></body></html>"
);
}
#[test]
fn test_attributes() {
let doc = Document::parse_with_options(
b"<!DOCTYPE html><html><head></head><body data-foo='& \xC2\xA0 \"'></body></html>",
0,
InliningMode::Document,
);
let mut buffer = Vec::new();
doc.serialize(
&mut buffer,
vec![None; doc.nodes.len()],
false,
false,
false,
None,
InliningMode::Document,
false,
false,
)
.expect("Should not fail");
assert_eq!(buffer, b"<!DOCTYPE html><html><head></head><body data-foo=\"& "\"></body></html>");
}
#[test]
fn test_keep_at_rules_tags() {
let doc = Document::parse_with_options(
b"<html><head><style>h1 { color:red }</style></head>",
0,
InliningMode::Document,
);
let mut buffer = Vec::new();
doc.serialize(
&mut buffer,
vec![None; doc.nodes.len()],
false,
false,
false,
Some(&String::from(
"@media (max-width: 600px) { h1 { font-size: 18px; } }",
)),
InliningMode::Document,
false,
false,
)
.expect("Should not fail");
assert_eq!(buffer, b"<html><head><style>@media (max-width: 600px) { h1 { font-size: 18px; } }</style></head><body></body></html>");
}
}