1use super::{
2 attributes::Attributes,
3 document::Document,
4 node::{ElementData, NodeData, NodeId},
5 DocumentStyleMap, InliningMode,
6};
7use crate::{html::ElementStyleMap, parser, InlineError};
8use html5ever::{local_name, ns, tendril::StrTendril, LocalName, QualName};
9use smallvec::{smallvec, SmallVec};
10use std::io::Write;
11
12#[allow(clippy::too_many_arguments)]
13pub(crate) fn serialize_to<W: Write>(
14 document: &Document,
15 writer: &mut W,
16 styles: DocumentStyleMap<'_>,
17 keep_style_tags: bool,
18 keep_link_tags: bool,
19 minify_css: bool,
20 at_rules: Option<&String>,
21 mode: InliningMode,
22) -> Result<(), InlineError> {
23 let sink = Sink::new(
24 document,
25 NodeId::document_id(),
26 keep_style_tags,
27 keep_link_tags,
28 minify_css,
29 at_rules,
30 mode,
31 );
32 let mut ser = HtmlSerializer::new(writer, styles);
33 sink.serialize(&mut ser)
34}
35
36struct Sink<'a> {
38 document: &'a Document,
39 node: NodeId,
40 keep_style_tags: bool,
41 keep_link_tags: bool,
42 minify_css: bool,
43 at_rules: Option<&'a String>,
44 inlining_mode: InliningMode,
45}
46
47impl<'a> Sink<'a> {
48 fn new(
49 document: &'a Document,
50 node: NodeId,
51 keep_style_tags: bool,
52 keep_link_tags: bool,
53 minify_css: bool,
54 at_rules: Option<&'a String>,
55 inlining_mode: InliningMode,
56 ) -> Sink<'a> {
57 Sink {
58 document,
59 node,
60 keep_style_tags,
61 keep_link_tags,
62 minify_css,
63 at_rules,
64 inlining_mode,
65 }
66 }
67 #[inline]
68 fn for_node(&self, node: NodeId) -> Sink<'a> {
69 Sink::new(
70 self.document,
71 node,
72 self.keep_style_tags,
73 self.keep_link_tags,
74 self.minify_css,
75 self.at_rules,
76 self.inlining_mode,
77 )
78 }
79 #[inline]
80 fn data(&self) -> &NodeData {
81 &self.document[self.node].data
82 }
83 #[inline]
84 fn should_skip_element(&self, element: &ElementData) -> bool {
85 if element.name.local == local_name!("style") {
86 !self.keep_style_tags && element.attributes.get_css_inline() != Some("keep")
87 } else if element.name.local == local_name!("link")
88 && element.attributes.get(local_name!("rel")) == Some("stylesheet")
89 {
90 !self.keep_link_tags
91 } else if element.name.local == local_name!("html") {
92 matches!(self.inlining_mode, InliningMode::Fragment)
93 } else {
94 false
95 }
96 }
97
98 fn serialize_children<W: Write>(
99 &self,
100 serializer: &mut HtmlSerializer<'_, W>,
101 ) -> Result<(), InlineError> {
102 for child in self.document.children(self.node) {
103 self.for_node(child).serialize(serializer)?;
104 }
105 Ok(())
106 }
107
108 fn serialize<W: Write>(
109 &self,
110 serializer: &mut HtmlSerializer<'_, W>,
111 ) -> Result<(), InlineError> {
112 match self.data() {
113 NodeData::Element {
114 element,
115 inlining_ignored,
116 } => {
117 if self.should_skip_element(element) {
118 return Ok(());
119 }
120
121 let style_node_id = if *inlining_ignored {
122 None
123 } else {
124 Some(self.node)
125 };
126
127 serializer.start_elem(
128 &element.name,
129 &element.attributes,
130 style_node_id,
131 self.minify_css,
132 )?;
133
134 if element.name.local == local_name!("head") {
135 if let Some(at_rules) = &self.at_rules {
136 if !at_rules.is_empty() {
137 serializer.write_at_rules_style(at_rules)?;
138 }
139 }
140 }
141
142 self.serialize_children(serializer)?;
143
144 serializer.end_elem(&element.name)?;
145 Ok(())
146 }
147 NodeData::Document => self.serialize_children(serializer),
148 NodeData::Doctype { name } => serializer.write_doctype(name),
149 NodeData::Text { text } => serializer.write_text(text),
150 NodeData::Comment { text } => serializer.write_comment(text),
151 NodeData::ProcessingInstruction { target, data } => {
152 serializer.write_processing_instruction(target, data)
153 }
154 }
155 }
156}
157
158struct ElemInfo {
159 html_name: Option<LocalName>,
160 ignore_children: bool,
161}
162
163struct HtmlSerializer<'a, Wr: Write> {
166 writer: Wr,
167 styles: DocumentStyleMap<'a>,
168 stack: Vec<ElemInfo>,
169 style_buffer: SmallVec<[Vec<u8>; 8]>,
170}
171
172impl<'a, W: Write> HtmlSerializer<'a, W> {
173 fn new(writer: W, styles: DocumentStyleMap<'a>) -> Self {
174 let mut stack = Vec::with_capacity(8);
175 stack.push(ElemInfo {
176 html_name: None,
177 ignore_children: false,
178 });
179 HtmlSerializer {
180 writer,
181 styles,
182 stack,
183 style_buffer: smallvec![],
184 }
185 }
186
187 fn parent(&mut self) -> &mut ElemInfo {
188 self.stack.last_mut().expect("no parent ElemInfo")
189 }
190
191 fn write_escaped(&mut self, text: &str) -> Result<(), InlineError> {
192 let mut last_end = 0;
193 for (start, part) in text.match_indices(['&', '\u{00A0}', '<', '>']) {
194 self.writer.write_all(
195 text.get(last_end..start)
196 .expect("Invalid substring")
197 .as_bytes(),
198 )?;
199 match (part.as_bytes()[0] & 0b0000_1110) >> 1 {
202 1 => self.writer.write_all(b" ")?,
203 3 => self.writer.write_all(b"&")?,
204 6 => self.writer.write_all(b"<")?,
205 7 => self.writer.write_all(b">")?,
206 _ => unreachable!(),
207 }
208 last_end = start.checked_add(part.len()).expect("Size overflow");
209 }
210 self.writer.write_all(
211 text.get(last_end..text.len())
212 .expect("Invalid substring")
213 .as_bytes(),
214 )?;
215 Ok(())
216 }
217
218 fn write_attributes(&mut self, text: &str) -> Result<(), InlineError> {
219 let mut last_end = 0;
220 for (start, part) in text.match_indices(['&', '\u{00A0}', '"']) {
221 self.writer.write_all(
222 text.get(last_end..start)
223 .expect("Invalid substring")
224 .as_bytes(),
225 )?;
226 match part {
227 "&" => self.writer.write_all(b"&")?,
228 "\u{00A0}" => self.writer.write_all(b" ")?,
229 "\"" => self.writer.write_all(b""")?,
230 _ => unreachable!("Only the variants above are searched"),
231 }
232 last_end = start.checked_add(part.len()).expect("Size overflow");
233 }
234 self.writer.write_all(
235 text.get(last_end..text.len())
236 .expect("Invalid substring")
237 .as_bytes(),
238 )?;
239 Ok(())
240 }
241
242 #[allow(clippy::too_many_lines)]
243 fn start_elem(
244 &mut self,
245 name: &QualName,
246 attrs: &Attributes,
247 style_node_id: Option<NodeId>,
248 minify_css: bool,
249 ) -> Result<(), InlineError> {
250 let html_name = match name.ns {
251 ns!(html) => Some(name.local.clone()),
252 _ => None,
253 };
254
255 if self.parent().ignore_children {
256 self.stack.push(ElemInfo {
257 html_name,
258 ignore_children: true,
259 });
260 return Ok(());
261 }
262
263 let mut styles = if let Some(node_id) = style_node_id {
264 self.styles.swap_remove(&node_id).map(|mut styles| {
265 if styles.len() > 1 {
268 styles.sort_unstable_by(|_, (a, _), _, (b, _)| a.cmp(b));
269 }
270 styles
271 })
272 } else {
273 None
274 };
275
276 self.writer.write_all(b"<")?;
277 self.writer.write_all(name.local.as_bytes())?;
278 if let Some(class) = &attrs.class {
279 self.writer.write_all(b" class=\"")?;
280 self.writer.write_all(class.value.as_bytes())?;
281 self.writer.write_all(b"\"")?;
282 }
283 for attr in &attrs.attributes {
284 self.writer.write_all(b" ")?;
285
286 match attr.name.ns {
287 ns!() => (),
288 ns!(xml) => self.writer.write_all(b"xml:")?,
289 ns!(xmlns) => {
290 if attr.name.local != local_name!("xmlns") {
291 self.writer.write_all(b"xmlns:")?;
292 }
293 }
294 ns!(xlink) => self.writer.write_all(b"xlink:")?,
295 _ => {
296 self.writer.write_all(b"unknown_namespace:")?;
297 }
298 }
299
300 self.writer.write_all(attr.name.local.as_bytes())?;
301 self.writer.write_all(b"=\"")?;
302 if attr.name.local == local_name!("style") {
303 if let Some(new_styles) = &styles {
304 merge_styles(
305 &mut self.writer,
306 &attr.value,
307 new_styles,
308 &mut self.style_buffer,
309 minify_css,
310 )?;
311 styles = None;
312 } else {
313 self.write_attributes(&attr.value)?;
314 }
315 } else {
316 self.write_attributes(&attr.value)?;
317 }
318 self.writer.write_all(b"\"")?;
319 }
320 if let Some(styles) = styles {
321 self.writer.write_all(b" style=\"")?;
322 if minify_css {
323 let mut it = styles.iter().peekable();
324 while let Some((property, (_, value))) = it.next() {
325 write_declaration(&mut self.writer, property, value, minify_css)?;
326 if !minify_css || it.peek().is_some() {
327 self.writer.write_all(b";")?;
328 }
329 }
330 } else {
331 for (property, (_, value)) in styles {
332 write_declaration(&mut self.writer, property, value, minify_css)?;
333 self.writer.write_all(b";")?;
334 }
335 }
336 self.writer.write_all(b"\"")?;
337 }
338 self.writer.write_all(b">")?;
339
340 let ignore_children = name.ns == ns!(html)
341 && matches!(
342 name.local,
343 local_name!("area")
344 | local_name!("base")
345 | local_name!("basefont")
346 | local_name!("bgsound")
347 | local_name!("br")
348 | local_name!("col")
349 | local_name!("embed")
350 | local_name!("frame")
351 | local_name!("hr")
352 | local_name!("img")
353 | local_name!("input")
354 | local_name!("keygen")
355 | local_name!("link")
356 | local_name!("meta")
357 | local_name!("param")
358 | local_name!("source")
359 | local_name!("track")
360 | local_name!("wbr")
361 );
362
363 self.stack.push(ElemInfo {
364 html_name,
365 ignore_children,
366 });
367
368 Ok(())
369 }
370
371 fn end_elem(&mut self, name: &QualName) -> Result<(), InlineError> {
372 let Some(info) = self.stack.pop() else {
373 panic!("no ElemInfo")
374 };
375 if info.ignore_children {
376 return Ok(());
377 }
378
379 self.writer.write_all(b"</")?;
380 self.writer.write_all(name.local.as_bytes())?;
381 self.writer.write_all(b">")?;
382 Ok(())
383 }
384
385 fn write_text(&mut self, text: &str) -> Result<(), InlineError> {
386 let escape = !matches!(
387 self.parent().html_name,
388 Some(
389 local_name!("style")
390 | local_name!("script")
391 | local_name!("xmp")
392 | local_name!("iframe")
393 | local_name!("noembed")
394 | local_name!("noframes")
395 | local_name!("plaintext")
396 | local_name!("noscript")
397 ),
398 );
399
400 if escape {
401 self.write_escaped(text)?;
402 } else {
403 self.writer.write_all(text.as_bytes())?;
404 }
405 Ok(())
406 }
407
408 fn write_at_rules_style(&mut self, at_rules: &str) -> Result<(), InlineError> {
409 self.writer.write_all(b"<style>")?;
410 self.writer.write_all(at_rules.as_bytes())?;
411 self.writer.write_all(b"</style>")?;
412 Ok(())
413 }
414
415 fn write_comment(&mut self, text: &str) -> Result<(), InlineError> {
416 self.writer.write_all(b"<!--")?;
417 self.writer.write_all(text.as_bytes())?;
418 self.writer.write_all(b"-->")?;
419 Ok(())
420 }
421
422 fn write_doctype(&mut self, name: &str) -> Result<(), InlineError> {
423 self.writer.write_all(b"<!DOCTYPE ")?;
424 self.writer.write_all(name.as_bytes())?;
425 self.writer.write_all(b">")?;
426 Ok(())
427 }
428
429 fn write_processing_instruction(
430 &mut self,
431 target: &str,
432 data: &str,
433 ) -> Result<(), InlineError> {
434 self.writer.write_all(b"<?")?;
435 self.writer.write_all(target.as_bytes())?;
436 self.writer.write_all(b" ")?;
437 self.writer.write_all(data.as_bytes())?;
438 self.writer.write_all(b">")?;
439 Ok(())
440 }
441}
442
443const STYLE_SEPARATOR: &[u8] = b": ";
444const STYLE_SEPARATOR_MIN: &[u8] = b":";
445
446#[inline]
447fn write_declaration<Wr: Write>(
448 writer: &mut Wr,
449 name: &str,
450 value: &str,
451 minify_css: bool,
452) -> Result<(), InlineError> {
453 writer.write_all(name.as_bytes())?;
454 if minify_css {
455 writer.write_all(STYLE_SEPARATOR_MIN)?;
456 } else {
457 writer.write_all(STYLE_SEPARATOR)?;
458 }
459 write_declaration_value(writer, value)
460}
461
462#[inline]
463fn write_declaration_value<Wr: Write>(writer: &mut Wr, value: &str) -> Result<(), InlineError> {
464 let value = value.trim();
465 let mut last_end = 0;
467 for (start, part) in value.match_indices('"') {
468 writer.write_all(
469 value
470 .get(last_end..start)
471 .expect("Invalid substring")
472 .as_bytes(),
473 )?;
474 writer.write_all(b"'")?;
475 last_end = start.checked_add(part.len()).expect("Size overflow");
476 }
477 writer.write_all(
478 value
479 .get(last_end..value.len())
480 .expect("Invalid substring")
481 .as_bytes(),
482 )?;
483 Ok(())
484}
485
486macro_rules! push_or_update {
487 ($style_buffer:expr, $length:expr, $name: expr, $value:expr, $minify_css:expr) => {{
488 if let Some(style) = $style_buffer.get_mut($length) {
489 style.clear();
490 write_declaration(style, &$name, $value, $minify_css)?;
491 } else {
492 let value = $value.trim();
493 let mut style = Vec::with_capacity(
494 $name
495 .len()
496 .saturating_add(STYLE_SEPARATOR.len())
497 .saturating_add(value.len()),
498 );
499 write_declaration(&mut style, &$name, $value, $minify_css)?;
500 $style_buffer.push(style);
501 };
502 $length = $length.saturating_add(1);
503 }};
504}
505
506fn merge_styles<Wr: Write>(
510 writer: &mut Wr,
511 current_style: &StrTendril,
512 new_styles: &ElementStyleMap<'_>,
513 declarations_buffer: &mut SmallVec<[Vec<u8>; 8]>,
514 minify_css: bool,
515) -> Result<(), InlineError> {
516 let mut parser_input = cssparser::ParserInput::new(current_style);
519 let mut parser = cssparser::Parser::new(&mut parser_input);
520 let mut declaration_parser = parser::CSSDeclarationListParser;
521 let current_declarations = cssparser::RuleBodyParser::new(&mut parser, &mut declaration_parser);
522 let mut parsed_declarations_count: usize = 0;
525 for (idx, declaration) in current_declarations.enumerate() {
526 parsed_declarations_count = parsed_declarations_count.saturating_add(1);
527 let (property, value) = declaration?;
528 let estimated_declaration_size = property
529 .len()
530 .saturating_add(STYLE_SEPARATOR.len())
531 .saturating_add(value.len());
532 if let Some(buffer) = declarations_buffer.get_mut(idx) {
535 buffer.clear();
536 buffer.reserve(estimated_declaration_size);
537 write_declaration(buffer, &property, value, minify_css)?;
538 } else {
539 let mut buffer = Vec::with_capacity(estimated_declaration_size);
540 write_declaration(&mut buffer, &property, value, minify_css)?;
541 declarations_buffer.push(buffer);
542 }
543 }
544 let current_declarations_count = parsed_declarations_count;
546 for (property, (_, value)) in new_styles {
549 match (
550 value.trim_end().strip_suffix("!important"),
551 declarations_buffer
552 .iter_mut()
553 .take(parsed_declarations_count)
554 .find(|style| {
555 style.starts_with(property.as_bytes())
556 && style.get(property.len()..=property.len().saturating_add(1))
557 == Some(STYLE_SEPARATOR)
558 }),
559 ) {
560 (Some(value), Some(buffer)) => {
563 buffer.truncate(property.len().saturating_add(STYLE_SEPARATOR.len()));
565 write_declaration_value(buffer, value)?;
566 }
567 (Some(value), None) => {
570 push_or_update!(
571 declarations_buffer,
572 parsed_declarations_count,
573 property,
574 value,
575 minify_css
576 );
577 }
578 (None, None) => push_or_update!(
581 declarations_buffer,
582 parsed_declarations_count,
583 property,
584 value,
585 minify_css
586 ),
587 (None, Some(_)) => {}
590 }
591 }
592
593 let mut first = true;
594 for range in [
595 current_declarations_count..parsed_declarations_count,
597 0..current_declarations_count,
599 ] {
600 for declaration in &declarations_buffer[range] {
601 if first {
602 first = false;
603 } else {
604 writer.write_all(b";")?;
605 }
606 writer.write_all(declaration)?;
607 }
608 }
609 Ok(())
610}
611
612#[cfg(test)]
613mod tests {
614 use crate::html::InliningMode;
615
616 use super::Document;
617 use indexmap::IndexMap;
618
619 #[test]
620 fn test_serialize() {
621 let doc = Document::parse_with_options(
622 b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head>",
623 0,
624 InliningMode::Document,
625 );
626 let mut buffer = Vec::new();
627 doc.serialize(
628 &mut buffer,
629 IndexMap::default(),
630 true,
631 false,
632 false,
633 None,
634 InliningMode::Document,
635 )
636 .expect("Should not fail");
637 assert_eq!(buffer, b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head><body></body></html>");
638 }
639
640 #[test]
641 fn test_skip_style_tags() {
642 let doc = Document::parse_with_options(
643 b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head>",
644 0,
645 InliningMode::Document,
646 );
647 let mut buffer = Vec::new();
648 doc.serialize(
649 &mut buffer,
650 IndexMap::default(),
651 false,
652 false,
653 false,
654 None,
655 InliningMode::Document,
656 )
657 .expect("Should not fail");
658 assert_eq!(buffer, b"<html><head></head><body></body></html>");
659 }
660
661 #[test]
662 fn test_escaped() {
663 let doc = Document::parse_with_options(
664 b"<!DOCTYPE html><html><head><title>& < > \xC2\xA0</title></head><body></body></html>",
665 0,
666 InliningMode::Document,
667 );
668 let mut buffer = Vec::new();
669 doc.serialize(
670 &mut buffer,
671 IndexMap::default(),
672 false,
673 false,
674 false,
675 None,
676 InliningMode::Document,
677 )
678 .expect("Should not fail");
679 assert_eq!(buffer, b"<!DOCTYPE html><html><head><title>& < > </title></head><body></body></html>");
680 }
681
682 #[test]
683 fn test_untouched_style() {
684 let doc = Document::parse_with_options(
685 b"<html><body><p style=\"color:blue;\"></p></body></html>",
686 0,
687 InliningMode::Document,
688 );
689 let mut buffer = Vec::new();
690 doc.serialize(
691 &mut buffer,
692 IndexMap::default(),
693 false,
694 false,
695 false,
696 None,
697 InliningMode::Document,
698 )
699 .expect("Should not fail");
700 assert_eq!(
701 buffer,
702 b"<html><head></head><body><p style=\"color:blue;\"></p></body></html>"
703 );
704 }
705
706 #[test]
707 fn test_attributes() {
708 let doc = Document::parse_with_options(
709 b"<!DOCTYPE html><html><head></head><body data-foo='& \xC2\xA0 \"'></body></html>",
710 0,
711 InliningMode::Document,
712 );
713 let mut buffer = Vec::new();
714 doc.serialize(
715 &mut buffer,
716 IndexMap::default(),
717 false,
718 false,
719 false,
720 None,
721 InliningMode::Document,
722 )
723 .expect("Should not fail");
724 assert_eq!(buffer, b"<!DOCTYPE html><html><head></head><body data-foo=\"& "\"></body></html>");
725 }
726
727 #[test]
728 fn test_keep_at_rules_tags() {
729 let doc = Document::parse_with_options(
730 b"<html><head><style>h1 { color:red }</style></head>",
731 0,
732 InliningMode::Document,
733 );
734 let mut buffer = Vec::new();
735 doc.serialize(
736 &mut buffer,
737 IndexMap::default(),
738 false,
739 false,
740 false,
741 Some(&String::from(
742 "@media (max-width: 600px) { h1 { font-size: 18px; } }",
743 )),
744 InliningMode::Document,
745 )
746 .expect("Should not fail");
747 assert_eq!(buffer, b"<html><head><style>@media (max-width: 600px) { h1 { font-size: 18px; } }</style></head><body></body></html>");
748 }
749}