css_inline/html/
serializer.rs

1use super::{
2    attributes::Attributes,
3    document::Document,
4    node::{ElementData, NodeData, NodeId},
5    DocumentStyleMap, InliningMode,
6};
7use crate::{html::ElementStyleMap, parser, InlineError};
8use html5ever::{local_name, ns, tendril::StrTendril, LocalName, QualName};
9use smallvec::{smallvec, SmallVec};
10use std::io::Write;
11
12#[allow(clippy::too_many_arguments)]
13pub(crate) fn serialize_to<W: Write>(
14    document: &Document,
15    writer: &mut W,
16    styles: DocumentStyleMap<'_>,
17    keep_style_tags: bool,
18    keep_link_tags: bool,
19    minify_css: bool,
20    at_rules: Option<&String>,
21    mode: InliningMode,
22) -> Result<(), InlineError> {
23    let sink = Sink::new(
24        document,
25        NodeId::document_id(),
26        keep_style_tags,
27        keep_link_tags,
28        minify_css,
29        at_rules,
30        mode,
31    );
32    let mut ser = HtmlSerializer::new(writer, styles);
33    sink.serialize(&mut ser)
34}
35
36/// Intermediary structure for serializing an HTML document.
37struct Sink<'a> {
38    document: &'a Document,
39    node: NodeId,
40    keep_style_tags: bool,
41    keep_link_tags: bool,
42    minify_css: bool,
43    at_rules: Option<&'a String>,
44    inlining_mode: InliningMode,
45}
46
47impl<'a> Sink<'a> {
48    fn new(
49        document: &'a Document,
50        node: NodeId,
51        keep_style_tags: bool,
52        keep_link_tags: bool,
53        minify_css: bool,
54        at_rules: Option<&'a String>,
55        inlining_mode: InliningMode,
56    ) -> Sink<'a> {
57        Sink {
58            document,
59            node,
60            keep_style_tags,
61            keep_link_tags,
62            minify_css,
63            at_rules,
64            inlining_mode,
65        }
66    }
67    #[inline]
68    fn for_node(&self, node: NodeId) -> Sink<'a> {
69        Sink::new(
70            self.document,
71            node,
72            self.keep_style_tags,
73            self.keep_link_tags,
74            self.minify_css,
75            self.at_rules,
76            self.inlining_mode,
77        )
78    }
79    #[inline]
80    fn data(&self) -> &NodeData {
81        &self.document[self.node].data
82    }
83    #[inline]
84    fn should_skip_element(&self, element: &ElementData) -> bool {
85        if element.name.local == local_name!("style") {
86            !self.keep_style_tags && element.attributes.get_css_inline() != Some("keep")
87        } else if element.name.local == local_name!("link")
88            && element.attributes.get(local_name!("rel")) == Some("stylesheet")
89        {
90            !self.keep_link_tags
91        } else if element.name.local == local_name!("html") {
92            matches!(self.inlining_mode, InliningMode::Fragment)
93        } else {
94            false
95        }
96    }
97
98    fn serialize_children<W: Write>(
99        &self,
100        serializer: &mut HtmlSerializer<'_, W>,
101    ) -> Result<(), InlineError> {
102        for child in self.document.children(self.node) {
103            self.for_node(child).serialize(serializer)?;
104        }
105        Ok(())
106    }
107
108    fn serialize<W: Write>(
109        &self,
110        serializer: &mut HtmlSerializer<'_, W>,
111    ) -> Result<(), InlineError> {
112        match self.data() {
113            NodeData::Element {
114                element,
115                inlining_ignored,
116            } => {
117                if self.should_skip_element(element) {
118                    return Ok(());
119                }
120
121                let style_node_id = if *inlining_ignored {
122                    None
123                } else {
124                    Some(self.node)
125                };
126
127                serializer.start_elem(
128                    &element.name,
129                    &element.attributes,
130                    style_node_id,
131                    self.minify_css,
132                )?;
133
134                if element.name.local == local_name!("head") {
135                    if let Some(at_rules) = &self.at_rules {
136                        if !at_rules.is_empty() {
137                            serializer.write_at_rules_style(at_rules)?;
138                        }
139                    }
140                }
141
142                self.serialize_children(serializer)?;
143
144                serializer.end_elem(&element.name)?;
145                Ok(())
146            }
147            NodeData::Document => self.serialize_children(serializer),
148            NodeData::Doctype { name } => serializer.write_doctype(name),
149            NodeData::Text { text } => serializer.write_text(text),
150            NodeData::Comment { text } => serializer.write_comment(text),
151            NodeData::ProcessingInstruction { target, data } => {
152                serializer.write_processing_instruction(target, data)
153            }
154        }
155    }
156}
157
158struct ElemInfo {
159    html_name: Option<LocalName>,
160    ignore_children: bool,
161}
162
163/// Inspired by HTML serializer from `html5ever`
164/// Source: <https://github.com/servo/html5ever/blob/98d3c0cd01471af997cd60849a38da45a9414dfd/html5ever/src/serialize/mod.rs#L77>
165struct HtmlSerializer<'a, Wr: Write> {
166    writer: Wr,
167    styles: DocumentStyleMap<'a>,
168    stack: Vec<ElemInfo>,
169    style_buffer: SmallVec<[Vec<u8>; 8]>,
170}
171
172impl<'a, W: Write> HtmlSerializer<'a, W> {
173    fn new(writer: W, styles: DocumentStyleMap<'a>) -> Self {
174        let mut stack = Vec::with_capacity(8);
175        stack.push(ElemInfo {
176            html_name: None,
177            ignore_children: false,
178        });
179        HtmlSerializer {
180            writer,
181            styles,
182            stack,
183            style_buffer: smallvec![],
184        }
185    }
186
187    fn parent(&mut self) -> &mut ElemInfo {
188        self.stack.last_mut().expect("no parent ElemInfo")
189    }
190
191    fn write_escaped(&mut self, text: &str) -> Result<(), InlineError> {
192        let mut last_end = 0;
193        for (start, part) in text.match_indices(['&', '\u{00A0}', '<', '>']) {
194            self.writer.write_all(
195                text.get(last_end..start)
196                    .expect("Invalid substring")
197                    .as_bytes(),
198            )?;
199            // This is slightly faster than matching on `char`
200            // Notably, this approach does not work in `write_attributes` below
201            match (part.as_bytes()[0] & 0b0000_1110) >> 1 {
202                1 => self.writer.write_all(b"&nbsp;")?,
203                3 => self.writer.write_all(b"&amp;")?,
204                6 => self.writer.write_all(b"&lt;")?,
205                7 => self.writer.write_all(b"&gt;")?,
206                _ => unreachable!(),
207            }
208            last_end = start.checked_add(part.len()).expect("Size overflow");
209        }
210        self.writer.write_all(
211            text.get(last_end..text.len())
212                .expect("Invalid substring")
213                .as_bytes(),
214        )?;
215        Ok(())
216    }
217
218    fn write_attributes(&mut self, text: &str) -> Result<(), InlineError> {
219        let mut last_end = 0;
220        for (start, part) in text.match_indices(['&', '\u{00A0}', '"']) {
221            self.writer.write_all(
222                text.get(last_end..start)
223                    .expect("Invalid substring")
224                    .as_bytes(),
225            )?;
226            match part {
227                "&" => self.writer.write_all(b"&amp;")?,
228                "\u{00A0}" => self.writer.write_all(b"&nbsp;")?,
229                "\"" => self.writer.write_all(b"&quot;")?,
230                _ => unreachable!("Only the variants above are searched"),
231            }
232            last_end = start.checked_add(part.len()).expect("Size overflow");
233        }
234        self.writer.write_all(
235            text.get(last_end..text.len())
236                .expect("Invalid substring")
237                .as_bytes(),
238        )?;
239        Ok(())
240    }
241
242    #[allow(clippy::too_many_lines)]
243    fn start_elem(
244        &mut self,
245        name: &QualName,
246        attrs: &Attributes,
247        style_node_id: Option<NodeId>,
248        minify_css: bool,
249    ) -> Result<(), InlineError> {
250        let html_name = match name.ns {
251            ns!(html) => Some(name.local.clone()),
252            _ => None,
253        };
254
255        if self.parent().ignore_children {
256            self.stack.push(ElemInfo {
257                html_name,
258                ignore_children: true,
259            });
260            return Ok(());
261        }
262
263        let mut styles = if let Some(node_id) = style_node_id {
264            self.styles.swap_remove(&node_id).map(|mut styles| {
265                // Even though, there is a fast path for sorting of <2 elements, `indexmap` still
266                // rebuilds the hashtable unnecessarily
267                if styles.len() > 1 {
268                    styles.sort_unstable_by(|_, (a, _), _, (b, _)| a.cmp(b));
269                }
270                styles
271            })
272        } else {
273            None
274        };
275
276        self.writer.write_all(b"<")?;
277        self.writer.write_all(name.local.as_bytes())?;
278        if let Some(class) = &attrs.class {
279            self.writer.write_all(b" class=\"")?;
280            self.writer.write_all(class.value.as_bytes())?;
281            self.writer.write_all(b"\"")?;
282        }
283        for attr in &attrs.attributes {
284            self.writer.write_all(b" ")?;
285
286            match attr.name.ns {
287                ns!() => (),
288                ns!(xml) => self.writer.write_all(b"xml:")?,
289                ns!(xmlns) => {
290                    if attr.name.local != local_name!("xmlns") {
291                        self.writer.write_all(b"xmlns:")?;
292                    }
293                }
294                ns!(xlink) => self.writer.write_all(b"xlink:")?,
295                _ => {
296                    self.writer.write_all(b"unknown_namespace:")?;
297                }
298            }
299
300            self.writer.write_all(attr.name.local.as_bytes())?;
301            self.writer.write_all(b"=\"")?;
302            if attr.name.local == local_name!("style") {
303                if let Some(new_styles) = &styles {
304                    merge_styles(
305                        &mut self.writer,
306                        &attr.value,
307                        new_styles,
308                        &mut self.style_buffer,
309                        minify_css,
310                    )?;
311                    styles = None;
312                } else {
313                    self.write_attributes(&attr.value)?;
314                }
315            } else {
316                self.write_attributes(&attr.value)?;
317            }
318            self.writer.write_all(b"\"")?;
319        }
320        if let Some(styles) = styles {
321            self.writer.write_all(b" style=\"")?;
322            if minify_css {
323                let mut it = styles.iter().peekable();
324                while let Some((property, (_, value))) = it.next() {
325                    write_declaration(&mut self.writer, property, value, minify_css)?;
326                    if !minify_css || it.peek().is_some() {
327                        self.writer.write_all(b";")?;
328                    }
329                }
330            } else {
331                for (property, (_, value)) in styles {
332                    write_declaration(&mut self.writer, property, value, minify_css)?;
333                    self.writer.write_all(b";")?;
334                }
335            }
336            self.writer.write_all(b"\"")?;
337        }
338        self.writer.write_all(b">")?;
339
340        let ignore_children = name.ns == ns!(html)
341            && matches!(
342                name.local,
343                local_name!("area")
344                    | local_name!("base")
345                    | local_name!("basefont")
346                    | local_name!("bgsound")
347                    | local_name!("br")
348                    | local_name!("col")
349                    | local_name!("embed")
350                    | local_name!("frame")
351                    | local_name!("hr")
352                    | local_name!("img")
353                    | local_name!("input")
354                    | local_name!("keygen")
355                    | local_name!("link")
356                    | local_name!("meta")
357                    | local_name!("param")
358                    | local_name!("source")
359                    | local_name!("track")
360                    | local_name!("wbr")
361            );
362
363        self.stack.push(ElemInfo {
364            html_name,
365            ignore_children,
366        });
367
368        Ok(())
369    }
370
371    fn end_elem(&mut self, name: &QualName) -> Result<(), InlineError> {
372        let Some(info) = self.stack.pop() else {
373            panic!("no ElemInfo")
374        };
375        if info.ignore_children {
376            return Ok(());
377        }
378
379        self.writer.write_all(b"</")?;
380        self.writer.write_all(name.local.as_bytes())?;
381        self.writer.write_all(b">")?;
382        Ok(())
383    }
384
385    fn write_text(&mut self, text: &str) -> Result<(), InlineError> {
386        let escape = !matches!(
387            self.parent().html_name,
388            Some(
389                local_name!("style")
390                    | local_name!("script")
391                    | local_name!("xmp")
392                    | local_name!("iframe")
393                    | local_name!("noembed")
394                    | local_name!("noframes")
395                    | local_name!("plaintext")
396                    | local_name!("noscript")
397            ),
398        );
399
400        if escape {
401            self.write_escaped(text)?;
402        } else {
403            self.writer.write_all(text.as_bytes())?;
404        }
405        Ok(())
406    }
407
408    fn write_at_rules_style(&mut self, at_rules: &str) -> Result<(), InlineError> {
409        self.writer.write_all(b"<style>")?;
410        self.writer.write_all(at_rules.as_bytes())?;
411        self.writer.write_all(b"</style>")?;
412        Ok(())
413    }
414
415    fn write_comment(&mut self, text: &str) -> Result<(), InlineError> {
416        self.writer.write_all(b"<!--")?;
417        self.writer.write_all(text.as_bytes())?;
418        self.writer.write_all(b"-->")?;
419        Ok(())
420    }
421
422    fn write_doctype(&mut self, name: &str) -> Result<(), InlineError> {
423        self.writer.write_all(b"<!DOCTYPE ")?;
424        self.writer.write_all(name.as_bytes())?;
425        self.writer.write_all(b">")?;
426        Ok(())
427    }
428
429    fn write_processing_instruction(
430        &mut self,
431        target: &str,
432        data: &str,
433    ) -> Result<(), InlineError> {
434        self.writer.write_all(b"<?")?;
435        self.writer.write_all(target.as_bytes())?;
436        self.writer.write_all(b" ")?;
437        self.writer.write_all(data.as_bytes())?;
438        self.writer.write_all(b">")?;
439        Ok(())
440    }
441}
442
443const STYLE_SEPARATOR: &[u8] = b": ";
444const STYLE_SEPARATOR_MIN: &[u8] = b":";
445
446#[inline]
447fn write_declaration<Wr: Write>(
448    writer: &mut Wr,
449    name: &str,
450    value: &str,
451    minify_css: bool,
452) -> Result<(), InlineError> {
453    writer.write_all(name.as_bytes())?;
454    if minify_css {
455        writer.write_all(STYLE_SEPARATOR_MIN)?;
456    } else {
457        writer.write_all(STYLE_SEPARATOR)?;
458    }
459    write_declaration_value(writer, value)
460}
461
462#[inline]
463fn write_declaration_value<Wr: Write>(writer: &mut Wr, value: &str) -> Result<(), InlineError> {
464    let value = value.trim();
465    // Roughly based on `str::replace`
466    let mut last_end = 0;
467    for (start, part) in value.match_indices('"') {
468        writer.write_all(
469            value
470                .get(last_end..start)
471                .expect("Invalid substring")
472                .as_bytes(),
473        )?;
474        writer.write_all(b"'")?;
475        last_end = start.checked_add(part.len()).expect("Size overflow");
476    }
477    writer.write_all(
478        value
479            .get(last_end..value.len())
480            .expect("Invalid substring")
481            .as_bytes(),
482    )?;
483    Ok(())
484}
485
486macro_rules! push_or_update {
487    ($style_buffer:expr, $length:expr, $name: expr, $value:expr, $minify_css:expr) => {{
488        if let Some(style) = $style_buffer.get_mut($length) {
489            style.clear();
490            write_declaration(style, &$name, $value, $minify_css)?;
491        } else {
492            let value = $value.trim();
493            let mut style = Vec::with_capacity(
494                $name
495                    .len()
496                    .saturating_add(STYLE_SEPARATOR.len())
497                    .saturating_add(value.len()),
498            );
499            write_declaration(&mut style, &$name, $value, $minify_css)?;
500            $style_buffer.push(style);
501        };
502        $length = $length.saturating_add(1);
503    }};
504}
505
506/// Merge a new set of styles into an current one, considering the rules of CSS precedence.
507///
508/// The merge process maintains the order of specificity and respects the `!important` rule in CSS.
509fn merge_styles<Wr: Write>(
510    writer: &mut Wr,
511    current_style: &StrTendril,
512    new_styles: &ElementStyleMap<'_>,
513    declarations_buffer: &mut SmallVec<[Vec<u8>; 8]>,
514    minify_css: bool,
515) -> Result<(), InlineError> {
516    // This function is designed with a focus on reusing existing allocations where possible
517    // We start by parsing the current declarations in the "style" attribute
518    let mut parser_input = cssparser::ParserInput::new(current_style);
519    let mut parser = cssparser::Parser::new(&mut parser_input);
520    let mut declaration_parser = parser::CSSDeclarationListParser;
521    let current_declarations = cssparser::RuleBodyParser::new(&mut parser, &mut declaration_parser);
522    // We manually manage the length of our buffer. The buffer may contain slots used
523    // in previous runs, and we want to access only the portion that we build in this iteration
524    let mut parsed_declarations_count: usize = 0;
525    for (idx, declaration) in current_declarations.enumerate() {
526        parsed_declarations_count = parsed_declarations_count.saturating_add(1);
527        let (property, value) = declaration?;
528        let estimated_declaration_size = property
529            .len()
530            .saturating_add(STYLE_SEPARATOR.len())
531            .saturating_add(value.len());
532        // We store the existing style declarations in the buffer for later merging with new styles
533        // If possible, we reuse existing slots in the buffer to avoid additional allocations
534        if let Some(buffer) = declarations_buffer.get_mut(idx) {
535            buffer.clear();
536            buffer.reserve(estimated_declaration_size);
537            write_declaration(buffer, &property, value, minify_css)?;
538        } else {
539            let mut buffer = Vec::with_capacity(estimated_declaration_size);
540            write_declaration(&mut buffer, &property, value, minify_css)?;
541            declarations_buffer.push(buffer);
542        }
543    }
544    // Keep the number of current declarations to write them last as they have the precedence
545    let current_declarations_count = parsed_declarations_count;
546    // Next, we iterate over the new styles and merge them into our existing set
547    // New rules will not override old ones unless they are marked as `!important`
548    for (property, (_, value)) in new_styles {
549        match (
550            value.trim_end().strip_suffix("!important"),
551            declarations_buffer
552                .iter_mut()
553                .take(parsed_declarations_count)
554                .find(|style| {
555                    style.starts_with(property.as_bytes())
556                        && style.get(property.len()..=property.len().saturating_add(1))
557                            == Some(STYLE_SEPARATOR)
558                }),
559        ) {
560            // The new rule is `!important` and there's an existing rule with the same name
561            // In this case, we override the existing rule with the new one
562            (Some(value), Some(buffer)) => {
563                // We keep the rule name and the colon-space suffix - '<rule>: `
564                buffer.truncate(property.len().saturating_add(STYLE_SEPARATOR.len()));
565                write_declaration_value(buffer, value)?;
566            }
567            // There's no existing rule with the same name, but the new rule is `!important`
568            // In this case, we add the new rule with the `!important` suffix removed
569            (Some(value), None) => {
570                push_or_update!(
571                    declarations_buffer,
572                    parsed_declarations_count,
573                    property,
574                    value,
575                    minify_css
576                );
577            }
578            // There's no existing rule with the same name, and the new rule is not `!important`
579            // In this case, we just add the new rule as-is
580            (None, None) => push_or_update!(
581                declarations_buffer,
582                parsed_declarations_count,
583                property,
584                value,
585                minify_css
586            ),
587            // Rule exists and the new one is not `!important` - leave the existing rule as-is and
588            // ignore the new one.
589            (None, Some(_)) => {}
590        }
591    }
592
593    let mut first = true;
594    for range in [
595        // First, write the new rules
596        current_declarations_count..parsed_declarations_count,
597        // Then, write the current rules
598        0..current_declarations_count,
599    ] {
600        for declaration in &declarations_buffer[range] {
601            if first {
602                first = false;
603            } else {
604                writer.write_all(b";")?;
605            }
606            writer.write_all(declaration)?;
607        }
608    }
609    Ok(())
610}
611
612#[cfg(test)]
613mod tests {
614    use crate::html::InliningMode;
615
616    use super::Document;
617    use indexmap::IndexMap;
618
619    #[test]
620    fn test_serialize() {
621        let doc = Document::parse_with_options(
622            b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head>",
623            0,
624            InliningMode::Document,
625        );
626        let mut buffer = Vec::new();
627        doc.serialize(
628            &mut buffer,
629            IndexMap::default(),
630            true,
631            false,
632            false,
633            None,
634            InliningMode::Document,
635        )
636        .expect("Should not fail");
637        assert_eq!(buffer, b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head><body></body></html>");
638    }
639
640    #[test]
641    fn test_skip_style_tags() {
642        let doc = Document::parse_with_options(
643            b"<html><head><style>h1 { color:blue; }</style><style>h1 { color:red }</style></head>",
644            0,
645            InliningMode::Document,
646        );
647        let mut buffer = Vec::new();
648        doc.serialize(
649            &mut buffer,
650            IndexMap::default(),
651            false,
652            false,
653            false,
654            None,
655            InliningMode::Document,
656        )
657        .expect("Should not fail");
658        assert_eq!(buffer, b"<html><head></head><body></body></html>");
659    }
660
661    #[test]
662    fn test_escaped() {
663        let doc = Document::parse_with_options(
664            b"<!DOCTYPE html><html><head><title>& < > \xC2\xA0</title></head><body></body></html>",
665            0,
666            InliningMode::Document,
667        );
668        let mut buffer = Vec::new();
669        doc.serialize(
670            &mut buffer,
671            IndexMap::default(),
672            false,
673            false,
674            false,
675            None,
676            InliningMode::Document,
677        )
678        .expect("Should not fail");
679        assert_eq!(buffer, b"<!DOCTYPE html><html><head><title>&amp; &lt; &gt; &nbsp;</title></head><body></body></html>");
680    }
681
682    #[test]
683    fn test_untouched_style() {
684        let doc = Document::parse_with_options(
685            b"<html><body><p style=\"color:blue;\"></p></body></html>",
686            0,
687            InliningMode::Document,
688        );
689        let mut buffer = Vec::new();
690        doc.serialize(
691            &mut buffer,
692            IndexMap::default(),
693            false,
694            false,
695            false,
696            None,
697            InliningMode::Document,
698        )
699        .expect("Should not fail");
700        assert_eq!(
701            buffer,
702            b"<html><head></head><body><p style=\"color:blue;\"></p></body></html>"
703        );
704    }
705
706    #[test]
707    fn test_attributes() {
708        let doc = Document::parse_with_options(
709            b"<!DOCTYPE html><html><head></head><body data-foo='& \xC2\xA0 \"'></body></html>",
710            0,
711            InliningMode::Document,
712        );
713        let mut buffer = Vec::new();
714        doc.serialize(
715            &mut buffer,
716            IndexMap::default(),
717            false,
718            false,
719            false,
720            None,
721            InliningMode::Document,
722        )
723        .expect("Should not fail");
724        assert_eq!(buffer, b"<!DOCTYPE html><html><head></head><body data-foo=\"&amp; &nbsp; &quot;\"></body></html>");
725    }
726
727    #[test]
728    fn test_keep_at_rules_tags() {
729        let doc = Document::parse_with_options(
730            b"<html><head><style>h1 { color:red }</style></head>",
731            0,
732            InliningMode::Document,
733        );
734        let mut buffer = Vec::new();
735        doc.serialize(
736            &mut buffer,
737            IndexMap::default(),
738            false,
739            false,
740            false,
741            Some(&String::from(
742                "@media (max-width: 600px) { h1 { font-size: 18px; } }",
743            )),
744            InliningMode::Document,
745        )
746        .expect("Should not fail");
747        assert_eq!(buffer, b"<html><head><style>@media (max-width: 600px) { h1 { font-size: 18px; } }</style></head><body></body></html>");
748    }
749}