1use crate::ast::ElementClosingTag;
2use crate::ast::NodeData;
3use aho_corasick::AhoCorasickBuilder;
4use aho_corasick::MatchKind;
5use minify_html_common::pattern::Replacer;
6use once_cell::sync::Lazy;
7use std::io::Write;
8
9static TEXT_REPLACER: Lazy<Replacer> = Lazy::new(|| {
10 Replacer::new(
11 AhoCorasickBuilder::new()
12 .match_kind(MatchKind::LeftmostLongest)
13 .build(vec![b"&".to_vec(), b"<".to_vec()])
14 .unwrap(),
15 vec![b"&".to_vec(), b"<".to_vec()],
16 )
17});
18static DOUBLE_QUOTED_REPLACER: Lazy<Replacer> = Lazy::new(|| {
19 Replacer::new(
20 AhoCorasickBuilder::new()
21 .match_kind(MatchKind::LeftmostLongest)
22 .build(vec![b"&".to_vec(), b"\"".to_vec()])
23 .unwrap(),
24 vec![b"&".to_vec(), b""".to_vec()],
25 )
26});
27static SINGLE_QUOTED_REPLACER: Lazy<Replacer> = Lazy::new(|| {
28 Replacer::new(
29 AhoCorasickBuilder::new()
30 .match_kind(MatchKind::LeftmostLongest)
31 .build(vec![b"&".to_vec(), b"'".to_vec()])
32 .unwrap(),
33 vec![b"&".to_vec(), b"'".to_vec()],
34 )
35});
36static UNQUOTED_REPLACER: Lazy<Replacer> = Lazy::new(|| {
37 Replacer::new(
38 AhoCorasickBuilder::new()
39 .match_kind(MatchKind::LeftmostLongest)
40 .build(vec![
41 b"&".to_vec(),
42 b">".to_vec(),
43 b"\"".to_vec(),
44 b"'".to_vec(),
45 b"\x09".to_vec(),
46 b"\x0a".to_vec(),
47 b"\x0c".to_vec(),
48 b"\x0d".to_vec(),
49 b"\x20".to_vec(),
50 ])
51 .unwrap(),
52 vec![
53 b"&".to_vec(),
54 b">".to_vec(),
55 b""".to_vec(),
56 b"'".to_vec(),
57 b"	".to_vec(),
58 b" ".to_vec(),
59 b"".to_vec(),
60 b" ".to_vec(),
61 b" ".to_vec(),
62 ],
63 )
64});
65
66pub fn c14n_serialise_ast<T: Write>(out: &mut T, node: &NodeData) -> std::io::Result<()> {
67 match node {
68 NodeData::Bang { code, .. } => {
69 out.write_all(b"<!")?;
70 out.write_all(code)?;
71 out.write_all(b">")?;
72 }
73 NodeData::Comment { code, .. } => {
74 out.write_all(b"<!--")?;
75 out.write_all(code)?;
76 out.write_all(b"-->")?;
77 }
78 NodeData::Doctype { legacy, .. } => {
79 out.write_all(b"<!DOCTYPE html")?;
80 if !legacy.is_empty() {
81 out.write_all(b" ")?;
82 out.write_all(legacy)?;
83 };
84 out.write_all(b">")?;
85 }
86 NodeData::Element {
87 attributes,
88 closing_tag,
89 children,
90 name,
91 ..
92 } => {
93 out.write_all(b"<")?;
94 out.write_all(name)?;
95 let mut attrs_sorted = attributes.iter().collect::<Vec<_>>();
96 attrs_sorted.sort_unstable_by(|a, b| a.0.cmp(b.0));
97 for (name, value) in attrs_sorted.iter() {
98 out.write_all(b" ")?;
99 out.write_all(name)?;
100 if !value.value.is_empty() {
101 out.write_all(b"=")?;
102 match value.quote {
103 Some(b'"') => {
104 out.write_all(b"\"")?;
105 out.write_all(&DOUBLE_QUOTED_REPLACER.replace_all(&value.value))?;
106 out.write_all(b"\"")?;
107 }
108 Some(b'\'') => {
109 out.write_all(b"'")?;
110 out.write_all(&SINGLE_QUOTED_REPLACER.replace_all(&value.value))?;
111 out.write_all(b"'")?;
112 }
113 None => {
114 out.write_all(&UNQUOTED_REPLACER.replace_all(&value.value))?;
115 }
116 _ => unreachable!(),
117 };
118 };
119 }
120 if closing_tag == &ElementClosingTag::SelfClosing {
121 out.write_all(b" /")?;
122 };
123 out.write_all(b">")?;
124 for c in children {
125 c14n_serialise_ast(out, c)?;
126 }
127 if closing_tag == &ElementClosingTag::Present {
128 out.write_all(b"</")?;
129 out.write_all(name)?;
130 out.write_all(b">")?;
131 };
132 }
133 NodeData::Instruction { code, .. } => {
134 out.write_all(b"<?")?;
135 out.write_all(code)?;
136 out.write_all(b"?>")?;
137 }
138 NodeData::RcdataContent { typ: _, text } => {
139 out.write_all(&TEXT_REPLACER.replace_all(text))?;
140 }
141 NodeData::ScriptOrStyleContent { code, .. } => {
142 out.write_all(code)?;
143 }
144 NodeData::Text { value } => {
145 out.write_all(&TEXT_REPLACER.replace_all(value))?;
146 }
147 NodeData::Opaque { raw_source } => {
148 out.write_all(raw_source)?;
149 }
150 };
151 Ok(())
152}