1#[cfg(test)]
2mod tests;
3
4pub fn tag_optimize<'a>(mut content: Vec<HtmlTag<'a>>) -> Vec<HtmlTag<'a>> {
28 let mut offset = 0;
29 let _ = |x| match x {
32 HtmlTag::OpeningTag(i, j) => {
33 let mut a = j
34 .iter()
35 .map(|x| {
36 if let Some(i) = x.1 {
37 format!(" {}={}", x.0, i)
38 } else {
39 format!(" {}", x.0)
40 }
41 })
42 .fold(format!("<{}", i), |a, b| {
43 let mut a = a;
44 a.push_str(&b);
45 a
46 });
47 a.push('>');
48 a
49 }
50 HtmlTag::ClosingTag(i) => format!("</{}>", i),
51 HtmlTag::Unparsable(i) => i.to_string(),
52 };
53 for i in 0..content.len() {
57 if let HtmlTag::OpeningTag(name, _) = content[i + offset] {
58 match name {
59 "area" | "base" | "br" | "col" | "embed" | "hr" | "img" | "input" | "link"
60 | "meta" | "param" | "source" | "track" | "wbr" => {
61 content.insert(i + offset + 1, HtmlTag::ClosingTag(name));
62 offset += 1;
63 }
64 "li" | "dd" | "dt" | "rt" | "rp" | "optgroup" | "tr" | "td" | "th" => {
65 if let HtmlTag::OpeningTag(name_c, _) = content[i + offset + 1] {
66 if name_c == name {
67 content.insert(i + offset + 1, HtmlTag::ClosingTag(name));
68 offset += 1;
69 }
70 }
71 }
72 "p" => {
73 if let HtmlTag::OpeningTag(name_c, _) = content[i + offset + 1] {
77 match name_c {
78 "address" | "article" | "aside" | "blockquote" | "details" | "div"
79 | "dl" | "fieldset" | "figcaption" | "figure" | "footer" | "form"
80 | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "header" | "hgroup"
81 | "hr" | "main" | "menu" | "nav" | "ol" | "p" | "pre" | "section"
82 | "table" | "ul" => {
83 content.insert(i + offset + 1, HtmlTag::ClosingTag("p"));
84 offset += 1;
85 }
86 _ => {}
87 }
88 }
89 }
90 _ => {}
91 }
92 }
93 }
94
95 content
96}
97
98#[derive(PartialEq, Debug)]
100pub enum ElementTagState {
101 OnlyStartTag,
102 OnlyEndTag,
103 BothTag,
104}
105#[derive(PartialEq, Debug)]
107pub enum ElementContent<'a> {
108 HtmlElement(Box<HtmlElement<'a>>),
109 LiteralContent(&'a str),
110}
111#[derive(PartialEq, Debug)]
113pub struct HtmlElement<'a> {
114 pub name: &'a str,
116 pub attributes: Vec<(&'a str, Option<&'a str>)>,
118 pub tag_state: ElementTagState,
120 pub content: Vec<ElementContent<'a>>,
122}
123impl<'a> ElementContent<'a> {
124 pub fn parse(content: Vec<HtmlTag<'a>>) -> Result<Vec<Self>, ()> {
131 let mut constructed = Vec::new();
132 for i in content {
133 match i {
134 HtmlTag::OpeningTag(i, j) => {
135 constructed.push(Self::HtmlElement(Box::new(HtmlElement {
136 name: i,
137 attributes: j,
138 tag_state: ElementTagState::OnlyStartTag,
139 content: Vec::new(),
140 })))
141 }
142 HtmlTag::ClosingTag(i) => {
143 let mut tag_content = Vec::new();
144 while constructed.len() != 0 {
145 if let Self::HtmlElement(k) = &constructed[constructed.len() - 1] {
146 if k.name == i {
147 break;
148 }
149 }
150 tag_content.push(constructed.remove(constructed.len() - 1));
151 }
152 if constructed.len() == 0 {
153 return Err(());
154 }
155 let mut last_ref = if let Some(i) = constructed.last_mut() {
156 if let Self::HtmlElement(i) = i {
157 i
158 } else {
159 unsafe { core::hint::unreachable_unchecked() }
160 }
161 } else {
162 unsafe { core::hint::unreachable_unchecked() }
163 };
164 tag_content.reverse();
165 last_ref.content.append(&mut tag_content);
166 last_ref.tag_state = ElementTagState::BothTag;
167 }
168 HtmlTag::Unparsable(i) => constructed.push(Self::LiteralContent(i)),
169 }
170 }
171 Ok(constructed)
172 }
173}
174
175#[derive(PartialEq, Debug)]
177pub enum HtmlTag<'a> {
178 OpeningTag(&'a str, Vec<(&'a str, Option<&'a str>)>),
179 ClosingTag(&'a str),
180 Unparsable(&'a str),
181}
182impl<'a> HtmlTag<'a> {
183 pub fn parse(content: &'a str) -> Vec<Self> {
185 let mut last_splitn = 0;
186 let mut constructed = Vec::new();
187 let unparsable_content_push = |index, last_splitn, constructed: &mut Vec<_>| {
188 if last_splitn != 0 && !content[last_splitn + 1..index].trim().is_empty() {
189 constructed.push(Self::Unparsable(&content[last_splitn + 1..index]))
190 }
191 };
192 let mut ignore_parsing = None;
193 for (index, i) in content.char_indices() {
194 if i == '<' {
195 if ignore_parsing.is_none() {
196 unparsable_content_push(index, last_splitn, &mut constructed);
197 }
198 last_splitn = index;
199 } else if i == '>' {
200 let tag = &content[last_splitn..index];
201 if tag.chars().nth(0).unwrap() != '<' {
202 continue;
203 }
204 let tag = &tag[1..].trim_start();
205 let constru = if tag.chars().nth(0) == Some('/') {
206 if let Some((i, j)) = ignore_parsing {
207 if i == &tag[1..] {
208 ignore_parsing = None;
209 constructed.push(HtmlTag::Unparsable(&content[j..last_splitn]));
210 } else {
211 continue;
212 }
213 }
214 Self::ClosingTag(&tag[1..])
215 } else if tag.chars().nth(0) == Some('!') {
216 Self::Unparsable(tag)
217 } else {
218 if ignore_parsing.is_some() {
219 continue;
220 }
221 let parsed = Self::parse_opening_tag_content(tag);
222 if (parsed.0 == "script")
223 | (parsed.0 == "style")
224 | (parsed.0 == "textarea")
225 | (parsed.0 == "title")
226 {
227 ignore_parsing = Some((parsed.0, index + 1));
228 }
229 Self::OpeningTag(parsed.0, parsed.1)
230 };
231 constructed.push(constru);
232 last_splitn = index;
233 }
234 }
235 constructed
236 }
237 fn parse_opening_tag_content(content: &'a str) -> (&'a str, Vec<(&'a str, Option<&'a str>)>) {
238 let content = content.trim();
239 #[derive(PartialEq)]
240 enum QuoteStatus {
241 NoQuote,
242 SingleQuote,
243 DoubleQuote,
244 BangQuote,
245 };
246 let mut current_quotation = QuoteStatus::NoQuote;
247 let mut splitted_content = Vec::new();
248 let mut space_position = 0;
249 let mut is_empty = true;
250 let length = content.chars().count();
251 for (index, i) in content.char_indices() {
252 if i == ' ' && current_quotation == QuoteStatus::NoQuote && !is_empty {
253 if space_position != 0 {
255 space_position += 1;
256 }
257 splitted_content.push(&content[space_position..index]);
259 is_empty = true;
260 space_position = index;
261 } else if index + 1 == length {
262 splitted_content.push(&content[space_position..].trim_start());
263 space_position = index + 1;
264 } else if (i == '"') | (i == '\'') | (i == '!') {
265 current_quotation = match current_quotation {
266 QuoteStatus::NoQuote => {
267 if i == '"' {
268 QuoteStatus::DoubleQuote
269 } else if i == '\'' {
270 QuoteStatus::SingleQuote
271 } else {
272 QuoteStatus::BangQuote
273 }
274 }
275 _ => QuoteStatus::NoQuote,
276 };
277 }
278 if i != ' ' {
279 is_empty = false;
280 }
281 }
282 if splitted_content.len() == 0 {
283 return ("", Vec::new());
284 }
285 let name = splitted_content.remove(0);
286 let splitted_content = splitted_content
287 .iter_mut()
288 .map(|x| {
289 let equal_sign = x.rfind('=');
290 match equal_sign {
291 Some(i) => (
292 &x[..i],
293 Some(x[i + 1..].trim_matches(|c| (c == '"') | (c == '\''))),
294 ),
295 None => (&x[..], None),
296 }
297 })
298 .collect();
299 (name, splitted_content)
300 }
301}