1use super::{
2 attributes::Attributes,
3 document::Document,
4 node::{ElementData, NodeData, NodeId},
5 DocumentStyleMap, InliningMode,
6};
7use crate::{html::ElementStyleMap, parser, InlineError};
8use html5ever::{local_name, namespace_url, ns, tendril::StrTendril, LocalName, QualName};
9use smallvec::{smallvec, SmallVec};
10use std::io::Write;
11
12pub(crate) fn serialize_to<W: Write>(
13 document: &Document,
14 writer: &mut W,
15 styles: DocumentStyleMap<'_>,
16 keep_style_tags: bool,
17 keep_link_tags: bool,
18 mode: InliningMode,
19) -> Result<(), InlineError> {
20 let sink = Sink::new(
21 document,
22 NodeId::document_id(),
23 keep_style_tags,
24 keep_link_tags,
25 mode,
26 );
27 let mut ser = HtmlSerializer::new(writer, styles);
28 sink.serialize(&mut ser)
29}
30
31struct Sink<'a> {
33 document: &'a Document,
34 node: NodeId,
35 keep_style_tags: bool,
36 keep_link_tags: bool,
37 inlining_mode: InliningMode,
38}
39
40impl<'a> Sink<'a> {
41 fn new(
42 document: &'a Document,
43 node: NodeId,
44 keep_style_tags: bool,
45 keep_link_tags: bool,
46 inlining_mode: InliningMode,
47 ) -> Sink<'a> {
48 Sink {
49 document,
50 node,
51 keep_style_tags,
52 keep_link_tags,
53 inlining_mode,
54 }
55 }
56 #[inline]
57 fn for_node(&self, node: NodeId) -> Sink<'a> {
58 Sink::new(
59 self.document,
60 node,
61 self.keep_style_tags,
62 self.keep_link_tags,
63 self.inlining_mode,
64 )
65 }
66 #[inline]
67 fn data(&self) -> &NodeData {
68 &self.document[self.node].data
69 }
70 #[inline]
71 fn should_skip_element(&self, element: &ElementData) -> bool {
72 if element.name.local == local_name!("style") {
73 !self.keep_style_tags
74 && element.attributes.get("data-css-inline".into()) != Some("keep")
75 } else if element.name.local == local_name!("link")
76 && element.attributes.get(local_name!("rel")) == Some("stylesheet")
77 {
78 !self.keep_link_tags
79 } else if element.name.local == local_name!("html") {
80 matches!(self.inlining_mode, InliningMode::Fragment)
81 } else {
82 false
83 }
84 }
85
86 fn serialize_children<W: Write>(
87 &self,
88 serializer: &mut HtmlSerializer<'_, W>,
89 ) -> Result<(), InlineError> {
90 for child in self.document.children(self.node) {
91 self.for_node(child).serialize(serializer)?;
92 }
93 Ok(())
94 }
95
96 fn serialize<W: Write>(
97 &self,
98 serializer: &mut HtmlSerializer<'_, W>,
99 ) -> Result<(), InlineError> {
100 match self.data() {
101 NodeData::Element {
102 element,
103 inlining_ignored,
104 } => {
105 if self.should_skip_element(element) {
106 return Ok(());
107 }
108
109 let style_node_id = if *inlining_ignored {
110 None
111 } else {
112 Some(self.node)
113 };
114
115 serializer.start_elem(&element.name, &element.attributes, style_node_id)?;
116
117 self.serialize_children(serializer)?;
118
119 serializer.end_elem(&element.name)?;
120 Ok(())
121 }
122 NodeData::Document => self.serialize_children(serializer),
123 NodeData::Doctype { name } => serializer.write_doctype(name),
124 NodeData::Text { text: content } => serializer.write_text(content),
125 NodeData::Comment { text } => serializer.write_comment(text),
126 NodeData::ProcessingInstruction { target, data } => {
127 serializer.write_processing_instruction(target, data)
128 }
129 }
130 }
131}
132
133struct ElemInfo {
134 html_name: Option<LocalName>,
135 ignore_children: bool,
136}
137
138struct HtmlSerializer<'a, Wr: Write> {
141 writer: Wr,
142 styles: DocumentStyleMap<'a>,
143 stack: Vec<ElemInfo>,
144 style_buffer: SmallVec<[Vec<u8>; 8]>,
145}
146
147impl<'a, W: Write> HtmlSerializer<'a, W> {
148 fn new(writer: W, styles: DocumentStyleMap<'a>) -> Self {
149 let mut stack = Vec::with_capacity(8);
150 stack.push(ElemInfo {
151 html_name: None,
152 ignore_children: false,
153 });
154 HtmlSerializer {
155 writer,
156 styles,
157 stack,
158 style_buffer: smallvec![],
159 }
160 }
161
162 fn parent(&mut self) -> &mut ElemInfo {
163 self.stack.last_mut().expect("no parent ElemInfo")
164 }
165
166 fn write_escaped(&mut self, text: &str) -> Result<(), InlineError> {
167 let mut last_end = 0;
168 for (start, part) in text.match_indices(['&', '\u{00A0}', '<', '>']) {
169 self.writer.write_all(
170 text.get(last_end..start)
171 .expect("Invalid substring")
172 .as_bytes(),
173 )?;
174 match (part.as_bytes()[0] & 0b0000_1110) >> 1 {
177 1 => self.writer.write_all(b" ")?,
178 3 => self.writer.write_all(b"&")?,
179 6 => self.writer.write_all(b"<")?,
180 7 => self.writer.write_all(b">")?,
181 _ => unreachable!(),
182 }
183 last_end = start.checked_add(part.len()).expect("Size overflow");
184 }
185 self.writer.write_all(
186 text.get(last_end..text.len())
187 .expect("Invalid substring")
188 .as_bytes(),
189 )?;
190 Ok(())
191 }
192
193 fn write_attributes(&mut self, text: &str) -> Result<(), InlineError> {
194 let mut last_end = 0;
195 for (start, part) in text.match_indices(['&', '\u{00A0}', '"']) {
196 self.writer.write_all(
197 text.get(last_end..start)
198 .expect("Invalid substring")
199 .as_bytes(),
200 )?;
201 match part {
202 "&" => self.writer.write_all(b"&")?,
203 "\u{00A0}" => self.writer.write_all(b" ")?,
204 "\"" => self.writer.write_all(b""")?,
205 _ => unreachable!("Only the variants above are searched"),
206 }
207 last_end = start.checked_add(part.len()).expect("Size overflow");
208 }
209 self.writer.write_all(
210 text.get(last_end..text.len())
211 .expect("Invalid substring")
212 .as_bytes(),
213 )?;
214 Ok(())
215 }
216
217 fn start_elem(
218 &mut self,
219 name: &QualName,
220 attrs: &Attributes,
221 style_node_id: Option<NodeId>,
222 ) -> Result<(), InlineError> {
223 let html_name = match name.ns {
224 ns!(html) => Some(name.local.clone()),
225 _ => None,
226 };
227
228 if self.parent().ignore_children {
229 self.stack.push(ElemInfo {
230 html_name,
231 ignore_children: true,
232 });
233 return Ok(());
234 }
235
236 let mut styles = if let Some(node_id) = style_node_id {
237 self.styles.swap_remove(&node_id).map(|mut styles| {
238 styles.sort_unstable_by(|_, (a, _), _, (b, _)| a.cmp(b));
239 styles
240 })
241 } else {
242 None
243 };
244
245 self.writer.write_all(b"<")?;
246 self.writer.write_all(name.local.as_bytes())?;
247 if let Some(class) = &attrs.class {
248 self.writer.write_all(b" class=\"")?;
249 self.writer.write_all(class.value.as_bytes())?;
250 self.writer.write_all(b"\"")?;
251 }
252 for attr in &attrs.attributes {
253 self.writer.write_all(b" ")?;
254
255 match attr.name.ns {
256 ns!() => (),
257 ns!(xml) => self.writer.write_all(b"xml:")?,
258 ns!(xmlns) => {
259 if attr.name.local != local_name!("xmlns") {
260 self.writer.write_all(b"xmlns:")?;
261 }
262 }
263 ns!(xlink) => self.writer.write_all(b"xlink:")?,
264 _ => {
265 self.writer.write_all(b"unknown_namespace:")?;
266 }
267 }
268
269 self.writer.write_all(attr.name.local.as_bytes())?;
270 self.writer.write_all(b"=\"")?;
271 if attr.name.local.as_bytes() == b"style" {
272 if let Some(new_styles) = &styles {
273 merge_styles(
274 &mut self.writer,
275 &attr.value,
276 new_styles,
277 &mut self.style_buffer,
278 )?;
279 styles = None;
280 } else {
281 self.write_attributes(&attr.value)?;
282 }
283 } else {
284 self.write_attributes(&attr.value)?;
285 }
286 self.writer.write_all(b"\"")?;
287 }
288 if let Some(styles) = &styles {
289 self.writer.write_all(b" style=\"")?;
290 for (property, (_, value)) in styles {
291 write_declaration(&mut self.writer, property, value)?;
292 self.writer.write_all(b";")?;
293 }
294 self.writer.write_all(b"\"")?;
295 }
296 self.writer.write_all(b">")?;
297
298 let ignore_children = name.ns == ns!(html)
299 && matches!(
300 name.local,
301 local_name!("area")
302 | local_name!("base")
303 | local_name!("basefont")
304 | local_name!("bgsound")
305 | local_name!("br")
306 | local_name!("col")
307 | local_name!("embed")
308 | local_name!("frame")
309 | local_name!("hr")
310 | local_name!("img")
311 | local_name!("input")
312 | local_name!("keygen")
313 | local_name!("link")
314 | local_name!("meta")
315 | local_name!("param")
316 | local_name!("source")
317 | local_name!("track")
318 | local_name!("wbr")
319 );
320
321 self.stack.push(ElemInfo {
322 html_name,
323 ignore_children,
324 });
325
326 Ok(())
327 }
328
329 fn end_elem(&mut self, name: &QualName) -> Result<(), InlineError> {
330 let Some(info) = self.stack.pop() else {
331 panic!("no ElemInfo")
332 };
333 if info.ignore_children {
334 return Ok(());
335 }
336
337 self.writer.write_all(b"</")?;
338 self.writer.write_all(name.local.as_bytes())?;
339 self.writer.write_all(b">")?;
340 Ok(())
341 }
342
343 fn write_text(&mut self, text: &str) -> Result<(), InlineError> {
344 let escape = !matches!(
345 self.parent().html_name,
346 Some(
347 local_name!("style")
348 | local_name!("script")
349 | local_name!("xmp")
350 | local_name!("iframe")
351 | local_name!("noembed")
352 | local_name!("noframes")
353 | local_name!("plaintext")
354 | local_name!("noscript")
355 ),
356 );
357
358 if escape {
359 self.write_escaped(text)?;
360 } else {
361 self.writer.write_all(text.as_bytes())?;
362 }
363 Ok(())
364 }
365
366 fn write_comment(&mut self, text: &str) -> Result<(), InlineError> {
367 self.writer.write_all(b"<!--")?;
368 self.writer.write_all(text.as_bytes())?;
369 self.writer.write_all(b"-->")?;
370 Ok(())
371 }
372
373 fn write_doctype(&mut self, name: &str) -> Result<(), InlineError> {
374 self.writer.write_all(b"<!DOCTYPE ")?;
375 self.writer.write_all(name.as_bytes())?;
376 self.writer.write_all(b">")?;
377 Ok(())
378 }
379
380 fn write_processing_instruction(
381 &mut self,
382 target: &str,
383 data: &str,
384 ) -> Result<(), InlineError> {
385 self.writer.write_all(b"<?")?;
386 self.writer.write_all(target.as_bytes())?;
387 self.writer.write_all(b" ")?;
388 self.writer.write_all(data.as_bytes())?;
389 self.writer.write_all(b">")?;
390 Ok(())
391 }
392}
393
394const STYLE_SEPARATOR: &[u8] = b": ";
395
396#[inline]
397fn write_declaration<Wr: Write>(
398 writer: &mut Wr,
399 name: &str,
400 value: &str,
401) -> Result<(), InlineError> {
402 writer.write_all(name.as_bytes())?;
403 writer.write_all(STYLE_SEPARATOR)?;
404 write_declaration_value(writer, value)
405}
406
407#[inline]
408fn write_declaration_value<Wr: Write>(writer: &mut Wr, value: &str) -> Result<(), InlineError> {
409 let value = value.trim();
410 if value.as_bytes().contains(&b'"') {
411 let mut last_end = 0;
413 for (start, part) in value.match_indices('"') {
414 writer.write_all(
415 value
416 .get(last_end..start)
417 .expect("Invalid substring")
418 .as_bytes(),
419 )?;
420 writer.write_all(b"'")?;
421 last_end = start.checked_add(part.len()).expect("Size overflow");
422 }
423 writer.write_all(
424 value
425 .get(last_end..value.len())
426 .expect("Invalid substring")
427 .as_bytes(),
428 )?;
429 } else {
430 writer.write_all(value.as_bytes())?;
431 }
432 Ok(())
433}
434
435macro_rules! push_or_update {
436 ($style_buffer:expr, $length:expr, $name: expr, $value:expr) => {{
437 if let Some(style) = $style_buffer.get_mut($length) {
438 style.clear();
439 write_declaration(style, &$name, $value)?;
440 } else {
441 let value = $value.trim();
442 let mut style = Vec::with_capacity(
443 $name
444 .len()
445 .saturating_add(STYLE_SEPARATOR.len())
446 .saturating_add(value.len()),
447 );
448 write_declaration(&mut style, &$name, $value)?;
449 $style_buffer.push(style);
450 };
451 $length = $length.saturating_add(1);
452 }};
453}
454
455fn merge_styles<Wr: Write>(
459 writer: &mut Wr,
460 current_style: &StrTendril,
461 new_styles: &ElementStyleMap<'_>,
462 declarations_buffer: &mut SmallVec<[Vec<u8>; 8]>,
463) -> Result<(), InlineError> {
464 let mut parser_input = cssparser::ParserInput::new(current_style);
467 let mut parser = cssparser::Parser::new(&mut parser_input);
468 let mut declaration_parser = parser::CSSDeclarationListParser;
469 let current_declarations = cssparser::RuleBodyParser::new(&mut parser, &mut declaration_parser);
470 let mut parsed_declarations_count: usize = 0;
473 for (idx, declaration) in current_declarations.enumerate() {
474 parsed_declarations_count = parsed_declarations_count.saturating_add(1);
475 let (property, value) = declaration?;
476 let estimated_declaration_size = property
477 .len()
478 .saturating_add(STYLE_SEPARATOR.len())
479 .saturating_add(value.len());
480 if let Some(buffer) = declarations_buffer.get_mut(idx) {
483 buffer.clear();
484 buffer.reserve(estimated_declaration_size);
485 write_declaration(buffer, &property, value)?;
486 } else {
487 let mut buffer = Vec::with_capacity(estimated_declaration_size);
488 write_declaration(&mut buffer, &property, value)?;
489 declarations_buffer.push(buffer);
490 }
491 }
492 let current_declarations_count = parsed_declarations_count;
494 for (property, (_, value)) in new_styles {
497 match (
498 value.strip_suffix("!important"),
499 declarations_buffer
500 .iter_mut()
501 .take(parsed_declarations_count)
502 .find(|style| {
503 style.starts_with(property.as_bytes())
504 && style.get(property.len()..=property.len().saturating_add(1))
505 == Some(STYLE_SEPARATOR)
506 }),
507 ) {
508 (Some(value), Some(buffer)) => {
511 buffer.truncate(property.len().saturating_add(STYLE_SEPARATOR.len()));
513 write_declaration_value(buffer, value)?;
514 }
515 (Some(value), None) => {
518 push_or_update!(
519 declarations_buffer,
520 parsed_declarations_count,
521 property,
522 value
523 );
524 }
525 (None, None) => push_or_update!(
528 declarations_buffer,
529 parsed_declarations_count,
530 property,
531 value
532 ),
533 (None, Some(_)) => {}
536 }
537 }
538
539 let mut first = true;
540 for range in [
541 current_declarations_count..parsed_declarations_count,
543 0..current_declarations_count,
545 ] {
546 for declaration in &declarations_buffer[range] {
547 if first {
548 first = false;
549 } else {
550 writer.write_all(b";")?;
551 }
552 writer.write_all(declaration)?;
553 }
554 }
555 Ok(())
556}
557
558#[cfg(test)]
559mod tests {
560 use crate::html::InliningMode;
561
562 use super::Document;
563 use indexmap::IndexMap;
564
565 #[test]
566 fn test_serialize() {
567 let doc = Document::parse_with_options(
568 b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head>",
569 0,
570 InliningMode::Document,
571 );
572 let mut buffer = Vec::new();
573 doc.serialize(
574 &mut buffer,
575 IndexMap::default(),
576 true,
577 false,
578 InliningMode::Document,
579 )
580 .expect("Should not fail");
581 assert_eq!(buffer, b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head><body></body></html>");
582 }
583
584 #[test]
585 fn test_skip_style_tags() {
586 let doc = Document::parse_with_options(
587 b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head>",
588 0,
589 InliningMode::Document,
590 );
591 let mut buffer = Vec::new();
592 doc.serialize(
593 &mut buffer,
594 IndexMap::default(),
595 false,
596 false,
597 InliningMode::Document,
598 )
599 .expect("Should not fail");
600 assert_eq!(buffer, b"<html><head></head><body></body></html>");
601 }
602
603 #[test]
604 fn test_escaped() {
605 let doc = Document::parse_with_options(
606 b"<!DOCTYPE html><html><head><title>& < > \xC2\xA0</title></head><body></body></html>",
607 0,
608 InliningMode::Document,
609 );
610 let mut buffer = Vec::new();
611 doc.serialize(
612 &mut buffer,
613 IndexMap::default(),
614 false,
615 false,
616 InliningMode::Document,
617 )
618 .expect("Should not fail");
619 assert_eq!(buffer, b"<!DOCTYPE html><html><head><title>& < > </title></head><body></body></html>");
620 }
621
622 #[test]
623 fn test_untouched_style() {
624 let doc = Document::parse_with_options(
625 b"<html><body><p style=\"color:blue;\"></p></body></html>",
626 0,
627 InliningMode::Document,
628 );
629 let mut buffer = Vec::new();
630 doc.serialize(
631 &mut buffer,
632 IndexMap::default(),
633 false,
634 false,
635 InliningMode::Document,
636 )
637 .expect("Should not fail");
638 assert_eq!(
639 buffer,
640 b"<html><head></head><body><p style=\"color:blue;\"></p></body></html>"
641 );
642 }
643
644 #[test]
645 fn test_attributes() {
646 let doc = Document::parse_with_options(
647 b"<!DOCTYPE html><html><head></head><body data-foo='& \xC2\xA0 \"'></body></html>",
648 0,
649 InliningMode::Document,
650 );
651 let mut buffer = Vec::new();
652 doc.serialize(
653 &mut buffer,
654 IndexMap::default(),
655 false,
656 false,
657 InliningMode::Document,
658 )
659 .expect("Should not fail");
660 assert_eq!(buffer, b"<!DOCTYPE html><html><head></head><body data-foo=\"& "\"></body></html>");
661 }
662}