minify_html/ast/
c14n.rs

1use crate::ast::ElementClosingTag;
2use crate::ast::NodeData;
3use aho_corasick::AhoCorasickBuilder;
4use aho_corasick::MatchKind;
5use minify_html_common::pattern::Replacer;
6use once_cell::sync::Lazy;
7use std::io::Write;
8
9static TEXT_REPLACER: Lazy<Replacer> = Lazy::new(|| {
10  Replacer::new(
11    AhoCorasickBuilder::new()
12      .match_kind(MatchKind::LeftmostLongest)
13      .build(vec![b"&".to_vec(), b"<".to_vec()])
14      .unwrap(),
15    vec![b"&amp;".to_vec(), b"&lt;".to_vec()],
16  )
17});
18static DOUBLE_QUOTED_REPLACER: Lazy<Replacer> = Lazy::new(|| {
19  Replacer::new(
20    AhoCorasickBuilder::new()
21      .match_kind(MatchKind::LeftmostLongest)
22      .build(vec![b"&".to_vec(), b"\"".to_vec()])
23      .unwrap(),
24    vec![b"&amp;".to_vec(), b"&#34;".to_vec()],
25  )
26});
27static SINGLE_QUOTED_REPLACER: Lazy<Replacer> = Lazy::new(|| {
28  Replacer::new(
29    AhoCorasickBuilder::new()
30      .match_kind(MatchKind::LeftmostLongest)
31      .build(vec![b"&".to_vec(), b"'".to_vec()])
32      .unwrap(),
33    vec![b"&amp;".to_vec(), b"&#39;".to_vec()],
34  )
35});
36static UNQUOTED_REPLACER: Lazy<Replacer> = Lazy::new(|| {
37  Replacer::new(
38    AhoCorasickBuilder::new()
39      .match_kind(MatchKind::LeftmostLongest)
40      .build(vec![
41        b"&".to_vec(),
42        b">".to_vec(),
43        b"\"".to_vec(),
44        b"'".to_vec(),
45        b"\x09".to_vec(),
46        b"\x0a".to_vec(),
47        b"\x0c".to_vec(),
48        b"\x0d".to_vec(),
49        b"\x20".to_vec(),
50      ])
51      .unwrap(),
52    vec![
53      b"&amp;".to_vec(),
54      b"&gt;".to_vec(),
55      b"&#34;".to_vec(),
56      b"&#39;".to_vec(),
57      b"&#9;".to_vec(),
58      b"&#10;".to_vec(),
59      b"&#12;".to_vec(),
60      b"&#13;".to_vec(),
61      b"&#32;".to_vec(),
62    ],
63  )
64});
65
66pub fn c14n_serialise_ast<T: Write>(out: &mut T, node: &NodeData) -> std::io::Result<()> {
67  match node {
68    NodeData::Bang { code, .. } => {
69      out.write_all(b"<!")?;
70      out.write_all(code)?;
71      out.write_all(b">")?;
72    }
73    NodeData::Comment { code, .. } => {
74      out.write_all(b"<!--")?;
75      out.write_all(code)?;
76      out.write_all(b"-->")?;
77    }
78    NodeData::Doctype { legacy, .. } => {
79      out.write_all(b"<!DOCTYPE html")?;
80      if !legacy.is_empty() {
81        out.write_all(b" ")?;
82        out.write_all(legacy)?;
83      };
84      out.write_all(b">")?;
85    }
86    NodeData::Element {
87      attributes,
88      closing_tag,
89      children,
90      name,
91      ..
92    } => {
93      out.write_all(b"<")?;
94      out.write_all(name)?;
95      let mut attrs_sorted = attributes.iter().collect::<Vec<_>>();
96      attrs_sorted.sort_unstable_by(|a, b| a.0.cmp(b.0));
97      for (name, value) in attrs_sorted.iter() {
98        out.write_all(b" ")?;
99        out.write_all(name)?;
100        if !value.value.is_empty() {
101          out.write_all(b"=")?;
102          match value.quote {
103            Some(b'"') => {
104              out.write_all(b"\"")?;
105              out.write_all(&DOUBLE_QUOTED_REPLACER.replace_all(&value.value))?;
106              out.write_all(b"\"")?;
107            }
108            Some(b'\'') => {
109              out.write_all(b"'")?;
110              out.write_all(&SINGLE_QUOTED_REPLACER.replace_all(&value.value))?;
111              out.write_all(b"'")?;
112            }
113            None => {
114              out.write_all(&UNQUOTED_REPLACER.replace_all(&value.value))?;
115            }
116            _ => unreachable!(),
117          };
118        };
119      }
120      if closing_tag == &ElementClosingTag::SelfClosing {
121        out.write_all(b" /")?;
122      };
123      out.write_all(b">")?;
124      for c in children {
125        c14n_serialise_ast(out, c)?;
126      }
127      if closing_tag == &ElementClosingTag::Present {
128        out.write_all(b"</")?;
129        out.write_all(name)?;
130        out.write_all(b">")?;
131      };
132    }
133    NodeData::Instruction { code, .. } => {
134      out.write_all(b"<?")?;
135      out.write_all(code)?;
136      out.write_all(b"?>")?;
137    }
138    NodeData::RcdataContent { typ: _, text } => {
139      out.write_all(&TEXT_REPLACER.replace_all(text))?;
140    }
141    NodeData::ScriptOrStyleContent { code, .. } => {
142      out.write_all(code)?;
143    }
144    NodeData::Text { value } => {
145      out.write_all(&TEXT_REPLACER.replace_all(value))?;
146    }
147    NodeData::Opaque { raw_source } => {
148      out.write_all(raw_source)?;
149    }
150  };
151  Ok(())
152}