1use super::tokenizer::ElementAttributeToken;
2use crate::utils::Reader;
3
4#[derive(Debug, PartialEq, Clone)]
26pub struct Attribute<'html> {
27 pub key: &'html str,
29 pub value: Option<&'html str>,
31}
32
33#[derive(Debug, PartialEq, Clone, Default)]
41pub struct XHtmlElement<'html> {
42 pub name: &'html str,
44 pub id: Option<&'html str>,
46 pub class: Option<&'html str>,
48 pub attributes: &'html [Attribute<'html>],
50}
51
52#[derive(Debug, PartialEq)]
53pub enum XHtmlTag<'html> {
54 Open,
55 Close(&'html str),
56}
57
58impl<'html> XHtmlElement<'html> {
59 fn add_to_element(
60 &mut self,
61 attribute: Attribute<'html>,
62 attribute_tape: &mut Vec<Attribute<'html>>,
63 ) {
64 if self.name.is_empty() && attribute.value.is_none() {
65 self.name = attribute.key;
66 } else if self.class.is_none() && attribute.key == "class" && attribute.value.is_some() {
67 self.class = attribute.value;
68 } else if self.id.is_none() && attribute.key == "id" && attribute.value.is_some() {
69 self.id = attribute.value;
70 } else {
71 attribute_tape.push(attribute);
72 }
73 }
74
75 pub fn is_self_closing(&self) -> bool {
76 if matches!(
77 self.name,
78 "area"
79 | "base"
80 | "br"
81 | "col"
82 | "embed"
83 | "hr"
84 | "img"
85 | "input"
86 | "link"
87 | "meta"
88 | "param"
89 | "source"
90 | "track"
91 | "wbr"
92 ) {
93 return true;
94 }
95 if let Some(last_attribute) = self.attributes.last() {
96 return last_attribute.key == "\\";
97 }
98
99 false
100 }
101
102 pub fn clear(&mut self) {
103 self.name = "";
104 self.id = None;
105 self.class = None;
106 self.attributes = &[];
107 }
108
109 pub fn remove_attributes(&self, attribute_tape: &mut Vec<Attribute<'html>>) {
114 if self.attributes.is_empty() {
115 return;
116 }
117 let tape_ptr = attribute_tape.as_ptr();
118 let attr_range_ptr = self.attributes.as_ptr();
119 let idx = unsafe { attr_range_ptr.offset_from_unsigned(tape_ptr) };
120
121 attribute_tape.truncate(idx);
122 }
123
124 pub fn from(&mut self, reader: &mut Reader<'html>, attribute_tape: &mut Vec<Attribute<'html>>) {
125 let mut assign = false;
126 let mut key = None;
127 let start_len = attribute_tape.len();
128
129 while let Some(token) = ElementAttributeToken::next(reader) {
130 match token {
131 ElementAttributeToken::String(string_value) => match key {
132 None => {
133 debug_assert!(!assign);
134 key = Some(string_value);
135 }
136 Some(k) => {
137 if assign {
138 self.add_to_element(
139 Attribute {
140 key: k,
141 value: Some(string_value),
142 },
143 attribute_tape,
144 );
145 key = None;
146 } else {
147 self.add_to_element(
148 Attribute {
149 key: k,
150 value: None,
151 },
152 attribute_tape,
153 );
154 key = Some(string_value)
155 }
156 assign = false;
157 }
158 },
159
160 ElementAttributeToken::Equal => {
161 assign = true;
162 }
163 }
164 }
165
166 if let Some(attribute) = key {
167 self.add_to_element(
168 Attribute {
169 key: attribute,
170 value: None,
171 },
172 attribute_tape,
173 );
174 }
175
176 self.attributes = unsafe {
181 std::slice::from_raw_parts(
182 attribute_tape.as_ptr().add(start_len),
183 attribute_tape.len() - start_len,
184 )
185 };
186 }
187}
188
189impl<'a> XHtmlTag<'a> {
191 pub fn from(reader: &mut Reader<'a>) -> Option<Self> {
192 reader.next_while_list(&[b' ', b'\n', b'\r', b'\t', b'<']);
193 if let Some(character) = reader.peek() {
194 if character == b'/' {
195 let start = reader.get_position() + 1;
196 reader.next_until(b'>');
197
198 let end = reader.get_position();
199 reader.skip();
200
201 return Some(Self::Close(reader.slice(start..end).trim()));
206 } else if character == b'!' {
207 reader.next_until(b'>');
209 reader.skip();
210 return None;
211 }
212 }
213 Some(Self::Open)
214 }
215}
216
217#[cfg(test)]
218mod tests {
219 use super::*;
220
221 #[test]
222 fn test_key_no_quote_and_value_with_quote() {
223 let mut reader = Reader::new("p key=\"value\"");
224 let mut element = XHtmlElement::default();
225 let mut attributes = vec![];
226 element.from(&mut reader, &mut attributes);
227 assert_eq!(element.name, "p");
228
229 assert_eq!(
230 element.attributes[0],
231 Attribute {
232 key: "key",
233 value: Some("value")
234 }
235 );
236 }
237
238 #[test]
239 fn test_key_no_quote_and_value_no_quote() {
240 let mut reader = Reader::new("p key=value");
241 let mut element = XHtmlElement::default();
242 let mut attributes = vec![];
243 element.from(&mut reader, &mut attributes);
244
245 assert_eq!(element.name, "p");
246
247 assert_eq!(element.attributes.len(), 1);
248
249 assert_eq!(
250 element.attributes[0],
251 Attribute {
252 key: "key",
253 value: Some("value")
254 }
255 );
256 }
257
258 #[test]
259 fn test_key_with_quote_and_value_with_quote() {
260 let mut reader = Reader::new("p \"key\"=\"value\"");
261 let mut element = XHtmlElement::default();
262 let mut attributes = vec![];
263 element.from(&mut reader, &mut attributes);
264
265 assert_eq!(element.name, "p");
266
267 assert_eq!(
268 element.attributes[0],
269 Attribute {
270 key: "key",
271 value: Some("value")
272 }
273 );
274 }
275
276 #[test]
277 fn test_multiple_key_value_pairs() {
278 let mut reader = Reader::new("p key=\"value\" \"key1\"=value1 \"key2\"=\"value2\" keey");
279 let mut element = XHtmlElement::default();
280 let mut attributes = vec![];
281 element.from(&mut reader, &mut attributes);
282
283 assert_eq!(element.name, "p");
284
285 assert_eq!(
286 element.attributes[0],
287 Attribute {
288 key: "key",
289 value: Some("value")
290 }
291 );
292 assert_eq!(
293 element.attributes[1],
294 Attribute {
295 key: "key1",
296 value: Some("value1")
297 }
298 );
299 assert_eq!(
300 element.attributes[2],
301 Attribute {
302 key: "key2",
303 value: Some("value2")
304 }
305 );
306 assert_eq!(
307 element.attributes[3],
308 Attribute {
309 key: "keey",
310 value: None
311 }
312 );
313 }
314
315 #[test]
316 fn test_key_with_quote_and_no_value() {
317 let mut reader = Reader::new("p \"key\"");
318 let mut element = XHtmlElement::default();
319 let mut attributes = vec![];
320 element.from(&mut reader, &mut attributes);
321
322 assert_eq!(element.name, "p");
323
324 assert_eq!(
325 element.attributes[0],
326 Attribute {
327 key: "key",
328 value: None
329 }
330 );
331 }
332
333 #[test]
334 fn test_key_no_quote_and_no_value() {
335 let mut reader = Reader::new("p key");
336 let mut element = XHtmlElement::default();
337 let mut attributes = vec![];
338 element.from(&mut reader, &mut attributes);
339
340 assert_eq!(element.name, "p");
341
342 assert_eq!(
343 element.attributes[0],
344 Attribute {
345 key: "key",
346 value: None
347 }
348 );
349 }
350
351 #[test]
352 #[ignore = "Known issue: Escapes are not handled"]
353 fn test_key_no_quote_and_escaped_space_value() {
354 let mut reader = Reader::new("p key = hello\\ world");
355 let mut element = XHtmlElement::default();
356 let mut attributes = vec![];
357 element.from(&mut reader, &mut attributes);
358
359 assert_eq!(element.name, "p");
360
361 assert_eq!(
362 element.attributes[0],
363 Attribute {
364 key: "key",
365 value: Some("hello\\ world")
366 }
367 );
368 }
369
370 #[test]
371 fn test_long_key_with_spaces() {
372 let mut reader = Reader::new("p \"long key with spaces\"=\"value\"");
373 let mut element = XHtmlElement::default();
374 let mut attributes = vec![];
375 element.from(&mut reader, &mut attributes);
376
377 assert_eq!(element.name, "p");
378
379 assert_eq!(
380 element.attributes[0],
381 Attribute {
382 key: "long key with spaces",
383 value: Some("value")
384 }
385 );
386 }
387
388 #[test]
389 fn test_long_key_with_spaces_and_different_quote_inside() {
390 let mut reader = Reader::new("p \"long key's with spaces\"=\"value\"");
391 let mut element = XHtmlElement::default();
392 let mut attributes = vec![];
393 element.from(&mut reader, &mut attributes);
394
395 assert_eq!(element.name, "p");
396
397 assert_eq!(
398 element.attributes[0],
399 Attribute {
400 key: "long key's with spaces",
401 value: Some("value")
402 }
403 );
404 }
405
406 #[test]
407 #[ignore = "Known issue: Escapes are not handled"]
408 fn test_long_key_with_spaces_and_real_same_quote_inside() {
409 let mut reader = Reader::new(r#"p "long key\"s with spaces"="value""#);
410 let mut element = XHtmlElement::default();
411 let mut attributes = vec![];
412 element.from(&mut reader, &mut attributes);
413
414 assert_eq!(element.name, "p");
415
416 assert_eq!(
417 element.attributes[0],
418 Attribute {
419 key: r#"long key\"s with spaces"#,
420 value: Some("value")
421 }
422 );
423 }
424
425 #[test]
426 #[ignore = "Known issue: Escapes are not handled"]
427 fn test_long_key_and_value_with_spaces_and_real_same_quote_inside() {
428 let mut reader = Reader::new(
429 r#"p "long key\"s with spaces"="value\"s of an other person \\\\\\ \\\\\ \ \ \"""#,
430 );
431 let mut element = XHtmlElement::default();
432 let mut attributes = vec![];
433 element.from(&mut reader, &mut attributes);
434
435 assert_eq!(element.name, "p");
436
437 assert_eq!(
438 element.attributes[0],
439 Attribute {
440 key: r#"long key\"s with spaces"#,
441 value: Some(r#"value\"s of an other person \\\\\\ \\\\\ \ \ \""#)
442 }
443 );
444 }
445
446 #[test]
447 fn test_valid_anchor_tag_attributes() {
448 let mut reader = Reader::new(
449 "a target=\"_blank\" href=\"/my_cv.pdf\" class=\"px-7 py-3\" hello-world=hello-world",
450 );
451 let mut element = XHtmlElement::default();
452 let mut attributes = vec![];
453 element.from(&mut reader, &mut attributes);
454
455 assert_eq!(element.name, "a");
456
457 assert_eq!(
458 element.attributes[0],
459 Attribute {
460 key: "target",
461 value: Some("_blank")
462 }
463 );
464
465 assert_eq!(
466 element.attributes[1],
467 Attribute {
468 key: "href",
469 value: Some("/my_cv.pdf")
470 }
471 );
472
473 assert_eq!(element.class, Some("px-7 py-3"));
474
475 assert_eq!(
476 element.attributes[2],
477 Attribute {
478 key: "hello-world",
479 value: Some("hello-world")
480 }
481 );
482 }
483
484 #[test]
485 fn test_complex_open_tag() {
486 let mut reader = Reader::new(
487 r#"a href="https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/crossorigin" title="The crossorigin attribute, valid on the <audio>, <img>, <link>, <script>, and <video> elements, provides support for CORS, defining how the element handles cross-origin requests, thereby enabling the configuration of the CORS requests for the element's fetched data. Depending on the element, the attribute can be a CORS settings attribute.""#,
488 );
489
490 let tag = XHtmlTag::from(&mut reader);
491 let mut element = XHtmlElement::default();
492 let mut attributes = vec![];
493 element.from(&mut reader, &mut attributes);
494
495 assert_eq!(tag, Some(XHtmlTag::Open));
496
497 assert_eq!(
498 element,
499 XHtmlElement {
500 name: "a",
501 id: None,
502 class: None,
503 attributes: &[
504 Attribute {
505 key: "href",
506 value: Some(
507 "https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/crossorigin"
508 )
509 },
510 Attribute {
511 key: "title",
512 value: Some(
513 "The crossorigin attribute, valid on the <audio>, <img>, <link>, <script>, and <video> elements, provides support for CORS, defining how the element handles cross-origin requests, thereby enabling the configuration of the CORS requests for the element's fetched data. Depending on the element, the attribute can be a CORS settings attribute."
514 )
515 }
516 ],
517 }
518 );
519 }
520
521 #[test]
522 fn test_xhtml_tag_open() {
523 let mut reader = Reader::new("p key=\"value\"");
524 let tag = XHtmlTag::from(&mut reader);
525 let mut element = XHtmlElement::default();
526 let mut attributes = vec![];
527 element.from(&mut reader, &mut attributes);
528
529 assert_eq!(tag, Some(XHtmlTag::Open));
530
531 assert_eq!(
532 element,
533 XHtmlElement {
534 name: "p",
535 id: None,
536 class: None,
537 attributes: &[Attribute {
538 key: "key",
539 value: Some("value")
540 }],
541 }
542 );
543 }
544
545 #[test]
546 fn test_xhtml_tag_close() {
547 let mut reader = Reader::new("/p>");
548 let tag = XHtmlTag::from(&mut reader);
549
550 assert_eq!(tag, Some(XHtmlTag::Close("p")));
551 }
552
553 #[test]
554 fn test_xhtml_tag_close_weird_formatting() {
555 let mut reader = Reader::new(" / p >");
556 let tag = XHtmlTag::from(&mut reader);
557
558 assert_eq!(tag, Some(XHtmlTag::Close("p")));
559 }
560
561 #[test]
562 fn test_parsing_comment() {
563 let mut reader = Reader::new("<!-- These 3 links will be selected by the selector -->");
564 let tag = XHtmlTag::from(&mut reader);
565
566 assert!(tag.is_none())
567 }
568
569 #[test]
570 fn test_parsing_mutiline_comment() {
571 let mut reader = Reader::new(
572 r#"
573 <!-- These 3 links will be selected by the selector -->
574 "#,
575 );
576 let tag = XHtmlTag::from(&mut reader);
577
578 assert!(tag.is_none())
579 }
580}