1extern crate alloc;
2
3use alloc::borrow::Cow;
4use alloc::string::String;
5use alloc::vec::Vec;
6use core::fmt;
7
8use facet_format::{
9 ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
10 ProbeStream, ScalarValue,
11};
12use html5gum::{Token, Tokenizer};
13
14pub struct HtmlParser<'de> {
19 events: Vec<ParseEvent<'de>>,
20 idx: usize,
21 pending_error: Option<HtmlError>,
22}
23
24impl<'de> HtmlParser<'de> {
25 pub fn new(input: &'de [u8]) -> Self {
27 match build_events(input) {
28 Ok(events) => Self {
29 events,
30 idx: 0,
31 pending_error: None,
32 },
33 Err(err) => Self {
34 events: Vec::new(),
35 idx: 0,
36 pending_error: Some(err),
37 },
38 }
39 }
40}
41
42#[derive(Debug, Clone)]
44pub enum HtmlError {
45 ParseError(String),
47 UnexpectedEof,
49 InvalidUtf8,
51}
52
53impl fmt::Display for HtmlError {
54 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55 match self {
56 HtmlError::ParseError(msg) => write!(f, "HTML parse error: {}", msg),
57 HtmlError::UnexpectedEof => write!(f, "Unexpected end of HTML"),
58 HtmlError::InvalidUtf8 => write!(f, "Invalid UTF-8 in HTML"),
59 }
60 }
61}
62
63impl std::error::Error for HtmlError {}
64
65impl<'de> FormatParser<'de> for HtmlParser<'de> {
66 type Error = HtmlError;
67 type Probe<'a>
68 = HtmlProbe<'de>
69 where
70 Self: 'a;
71
72 fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
73 if let Some(err) = &self.pending_error {
74 return Err(err.clone());
75 }
76 if self.idx >= self.events.len() {
77 return Ok(None);
78 }
79 let event = self.events[self.idx].clone();
80 self.idx += 1;
81 Ok(Some(event))
82 }
83
84 fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
85 if let Some(err) = &self.pending_error {
86 return Err(err.clone());
87 }
88 Ok(self.events.get(self.idx).cloned())
89 }
90
91 fn skip_value(&mut self) -> Result<(), Self::Error> {
92 let mut struct_depth = 0usize;
93 let mut pending_field_value = false;
94
95 loop {
96 let event = self.next_event()?.ok_or(HtmlError::UnexpectedEof)?;
97 match event {
98 ParseEvent::StructStart(_) | ParseEvent::SequenceStart(_) => {
99 pending_field_value = false;
100 struct_depth += 1;
101 }
102 ParseEvent::StructEnd | ParseEvent::SequenceEnd => {
103 if struct_depth == 0 {
104 break;
105 } else {
106 struct_depth -= 1;
107 if struct_depth == 0 && !pending_field_value {
108 break;
109 }
110 }
111 }
112 ParseEvent::Scalar(_) | ParseEvent::VariantTag(_) => {
113 if struct_depth == 0 && !pending_field_value {
114 break;
115 }
116 pending_field_value = false;
117 }
118 ParseEvent::FieldKey(_) | ParseEvent::OrderedField => {
119 pending_field_value = true;
120 }
121 }
122 }
123 Ok(())
124 }
125
126 fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
127 let evidence = self.build_probe();
128 Ok(HtmlProbe { evidence, idx: 0 })
129 }
130}
131
132impl<'de> HtmlParser<'de> {
133 fn build_probe(&self) -> Vec<FieldEvidence<'de>> {
135 let mut evidence = Vec::new();
136
137 if self.idx >= self.events.len() {
138 return evidence;
139 }
140
141 if !matches!(
142 self.events.get(self.idx),
143 Some(ParseEvent::StructStart(ContainerKind::Element))
144 ) {
145 return evidence;
146 }
147
148 let mut i = self.idx + 1;
149 let mut depth = 0usize;
150
151 while i < self.events.len() {
152 match &self.events[i] {
153 ParseEvent::StructStart(_) | ParseEvent::SequenceStart(_) => {
154 depth += 1;
155 i += 1;
156 }
157 ParseEvent::StructEnd | ParseEvent::SequenceEnd => {
158 if depth == 0 {
159 break;
160 }
161 depth -= 1;
162 i += 1;
163 }
164 ParseEvent::FieldKey(key) if depth == 0 => {
165 let scalar_value = if let Some(next_event) = self.events.get(i + 1) {
166 match next_event {
167 ParseEvent::Scalar(sv) => Some(sv.clone()),
168 _ => None,
169 }
170 } else {
171 None
172 };
173
174 if let Some(sv) = scalar_value {
175 evidence.push(FieldEvidence::with_scalar_value(
176 key.name.clone(),
177 key.location,
178 None,
179 sv,
180 key.namespace.clone(),
181 ));
182 } else {
183 evidence.push(FieldEvidence::new(
184 key.name.clone(),
185 key.location,
186 None,
187 key.namespace.clone(),
188 ));
189 }
190 i += 1;
191 }
192 _ => {
193 i += 1;
194 }
195 }
196 }
197
198 evidence
199 }
200}
201
202pub struct HtmlProbe<'de> {
204 evidence: Vec<FieldEvidence<'de>>,
205 idx: usize,
206}
207
208impl<'de> ProbeStream<'de> for HtmlProbe<'de> {
209 type Error = HtmlError;
210
211 fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
212 if self.idx >= self.evidence.len() {
213 Ok(None)
214 } else {
215 let ev = self.evidence[self.idx].clone();
216 self.idx += 1;
217 Ok(Some(ev))
218 }
219 }
220}
221
222#[derive(Debug, Clone)]
224enum ChildNode {
225 Text(String),
227 Element(Element),
229}
230
231#[derive(Debug, Clone)]
233struct Element {
234 name: String,
236 attributes: Vec<(String, String)>,
238 children: Vec<ChildNode>,
240}
241
242impl Element {
243 fn new(name: String, attributes: Vec<(String, String)>) -> Self {
244 Self {
245 name,
246 attributes,
247 children: Vec::new(),
248 }
249 }
250
251 fn push_text(&mut self, text: &str) {
252 let trimmed = text.trim();
254 if trimmed.is_empty() {
255 return;
256 }
257 if let Some(ChildNode::Text(existing)) = self.children.last_mut() {
259 existing.push(' ');
260 existing.push_str(trimmed);
261 } else {
262 self.children.push(ChildNode::Text(trimmed.to_string()));
263 }
264 }
265
266 fn push_child(&mut self, child: Element) {
267 self.children.push(ChildNode::Element(child));
268 }
269}
270
271const VOID_ELEMENTS: &[&str] = &[
273 "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source",
274 "track", "wbr",
275];
276
277fn is_void_element(name: &str) -> bool {
278 VOID_ELEMENTS.contains(&name.to_ascii_lowercase().as_str())
279}
280
281fn build_events<'de>(input: &'de [u8]) -> Result<Vec<ParseEvent<'de>>, HtmlError> {
283 let input_str = core::str::from_utf8(input).map_err(|_| HtmlError::InvalidUtf8)?;
284
285 let tokenizer = Tokenizer::new(input_str);
286 let mut stack: Vec<Element> = Vec::new();
287 let mut roots: Vec<Element> = Vec::new();
288 let mut doctype_name: Option<String> = None;
289
290 for token_result in tokenizer {
291 let token = token_result.map_err(|_| HtmlError::ParseError("tokenizer error".into()))?;
292
293 match token {
294 Token::StartTag(tag) => {
295 let name = String::from_utf8_lossy(&tag.name).to_ascii_lowercase();
296 let attributes: Vec<(String, String)> = tag
297 .attributes
298 .iter()
299 .map(|(k, v)| {
300 (
301 String::from_utf8_lossy(k).into_owned(),
302 String::from_utf8_lossy(v).into_owned(),
303 )
304 })
305 .collect();
306
307 let elem = Element::new(name.clone(), attributes);
308
309 if tag.self_closing || is_void_element(&name) {
310 attach_element(&mut stack, elem, &mut roots);
312 } else {
313 stack.push(elem);
315 }
316 }
317 Token::EndTag(tag) => {
318 let end_name = String::from_utf8_lossy(&tag.name).to_ascii_lowercase();
319
320 while let Some(elem) = stack.pop() {
323 if elem.name == end_name {
324 attach_element(&mut stack, elem, &mut roots);
325 break;
326 } else {
327 attach_element(&mut stack, elem, &mut roots);
329 }
330 }
331 }
332 Token::String(text) => {
333 let text_str = String::from_utf8_lossy(&text);
334 if let Some(current) = stack.last_mut() {
335 current.push_text(&text_str);
336 }
337 }
339 Token::Doctype(doctype) => {
340 let name = String::from_utf8_lossy(&doctype.name).to_ascii_lowercase();
342 if !name.is_empty() {
343 doctype_name = Some(name);
344 }
345 }
346 Token::Comment(_) | Token::Error(_) => {
347 }
349 }
350 }
351
352 while let Some(elem) = stack.pop() {
354 attach_element(&mut stack, elem, &mut roots);
355 }
356
357 if let Some(ref doctype) = doctype_name
359 && roots.len() == 1
360 && roots[0].name == "html"
361 {
362 roots[0]
364 .attributes
365 .insert(0, ("doctype".to_string(), doctype.clone()));
366 }
367
368 let mut events = Vec::new();
370
371 if roots.is_empty() {
372 events.push(ParseEvent::StructStart(ContainerKind::Element));
374 events.push(ParseEvent::StructEnd);
375 } else if roots.len() == 1 {
376 emit_element_events(&roots[0], &mut events);
378 } else {
379 events.push(ParseEvent::StructStart(ContainerKind::Element));
381 for root in &roots {
382 let key = FieldKey::new(Cow::Owned(root.name.clone()), FieldLocationHint::Child);
383 events.push(ParseEvent::FieldKey(key));
384 emit_element_events(root, &mut events);
385 }
386 events.push(ParseEvent::StructEnd);
387 }
388
389 Ok(events)
390}
391
392fn attach_element(stack: &mut [Element], elem: Element, roots: &mut Vec<Element>) {
394 if let Some(parent) = stack.last_mut() {
395 parent.push_child(elem);
396 } else {
397 roots.push(elem);
398 }
399}
400
401fn emit_element_events<'de>(elem: &Element, events: &mut Vec<ParseEvent<'de>>) {
403 let has_attrs = !elem.attributes.is_empty();
404 let has_children = !elem.children.is_empty();
405
406 if !has_attrs && !has_children {
408 events.push(ParseEvent::StructStart(ContainerKind::Element));
409 let key = FieldKey::new(Cow::Borrowed("_tag"), FieldLocationHint::Tag);
411 events.push(ParseEvent::FieldKey(key));
412 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
413 elem.name.clone(),
414 ))));
415 events.push(ParseEvent::StructEnd);
416 return;
417 }
418
419 events.push(ParseEvent::StructStart(ContainerKind::Element));
422
423 let key = FieldKey::new(Cow::Borrowed("_tag"), FieldLocationHint::Tag);
425 events.push(ParseEvent::FieldKey(key));
426 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
427 elem.name.clone(),
428 ))));
429
430 for (name, value) in &elem.attributes {
432 let key = FieldKey::new(Cow::Owned(name.clone()), FieldLocationHint::Attribute);
433 events.push(ParseEvent::FieldKey(key));
434 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
435 value.clone(),
436 ))));
437 }
438
439 for child in &elem.children {
441 match child {
442 ChildNode::Text(text) => {
443 let key = FieldKey::new(Cow::Borrowed("_text"), FieldLocationHint::Text);
444 events.push(ParseEvent::FieldKey(key));
445 events.push(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
446 text.clone(),
447 ))));
448 }
449 ChildNode::Element(child_elem) => {
450 let key = FieldKey::new(
451 Cow::Owned(child_elem.name.clone()),
452 FieldLocationHint::Child,
453 );
454 events.push(ParseEvent::FieldKey(key));
455 emit_element_events(child_elem, events);
456 }
457 }
458 }
459
460 events.push(ParseEvent::StructEnd);
461}
462
463#[cfg(test)]
464mod tests {
465 use super::*;
466 use facet::Facet;
467 use facet_format::FormatDeserializer;
468
469 #[test]
470 fn test_simple_element() {
471 let html = b"<div>hello</div>";
472 let events = build_events(html).unwrap();
473 assert_eq!(
475 events,
476 vec![
477 ParseEvent::StructStart(ContainerKind::Element),
478 ParseEvent::FieldKey(FieldKey::new(Cow::Borrowed("_tag"), FieldLocationHint::Tag)),
479 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("div".into()))),
480 ParseEvent::FieldKey(FieldKey::new(
481 Cow::Borrowed("_text"),
482 FieldLocationHint::Text
483 )),
484 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("hello".into()))),
485 ParseEvent::StructEnd,
486 ]
487 );
488 }
489
490 #[test]
491 fn test_element_with_attribute() {
492 let html = b"<div class=\"foo\">hello</div>";
493 let events = build_events(html).unwrap();
494 assert_eq!(
495 events,
496 vec![
497 ParseEvent::StructStart(ContainerKind::Element),
498 ParseEvent::FieldKey(FieldKey::new(Cow::Borrowed("_tag"), FieldLocationHint::Tag)),
499 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("div".into()))),
500 ParseEvent::FieldKey(FieldKey::new(
501 Cow::Owned("class".into()),
502 FieldLocationHint::Attribute
503 )),
504 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("foo".into()))),
505 ParseEvent::FieldKey(FieldKey::new(
506 Cow::Owned("_text".into()),
507 FieldLocationHint::Text
508 )),
509 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("hello".into()))),
510 ParseEvent::StructEnd,
511 ]
512 );
513 }
514
515 #[test]
516 fn test_nested_elements() {
517 let html = b"<div><span>inner</span></div>";
518 let events = build_events(html).unwrap();
519 assert_eq!(
521 events,
522 vec![
523 ParseEvent::StructStart(ContainerKind::Element),
524 ParseEvent::FieldKey(FieldKey::new(Cow::Borrowed("_tag"), FieldLocationHint::Tag)),
525 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("div".into()))),
526 ParseEvent::FieldKey(FieldKey::new(
527 Cow::Owned("span".into()),
528 FieldLocationHint::Child
529 )),
530 ParseEvent::StructStart(ContainerKind::Element),
531 ParseEvent::FieldKey(FieldKey::new(Cow::Borrowed("_tag"), FieldLocationHint::Tag)),
532 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("span".into()))),
533 ParseEvent::FieldKey(FieldKey::new(
534 Cow::Borrowed("_text"),
535 FieldLocationHint::Text
536 )),
537 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("inner".into()))),
538 ParseEvent::StructEnd,
539 ParseEvent::StructEnd,
540 ]
541 );
542 }
543
544 #[test]
545 fn test_void_element() {
546 let html = b"<div><br><span>after</span></div>";
547 let events = build_events(html).unwrap();
548 assert!(!events.is_empty());
550 }
551
552 #[test]
553 fn test_deserialize_simple_struct() {
554 #[derive(Debug, Facet, PartialEq)]
555 struct Div {
556 #[facet(default)]
557 class: Option<String>,
558 }
559
560 let html = b"<div class=\"container\"></div>";
561 let parser = HtmlParser::new(html);
562 let mut deserializer = FormatDeserializer::new(parser);
563 let result: Div = deserializer.deserialize().unwrap();
564 assert_eq!(result.class, Some("container".into()));
565 }
566
567 #[test]
568 fn test_deserialize_nested() {
569 use facet_xml as xml;
570
571 #[derive(Debug, Facet, PartialEq)]
572 struct Outer {
573 #[facet(default)]
574 inner: Option<Inner>,
575 }
576
577 #[derive(Debug, Facet, PartialEq)]
578 struct Inner {
579 #[facet(default)]
580 value: Option<Value>,
581 }
582
583 #[derive(Debug, Facet, PartialEq)]
584 struct Value {
585 #[facet(xml::text, default)]
586 text: String,
587 }
588
589 let html = b"<outer><inner><value>hello</value></inner></outer>";
590 let parser = HtmlParser::new(html);
591 let mut deserializer = FormatDeserializer::new(parser);
592 let result: Outer = deserializer.deserialize().unwrap();
593 assert_eq!(
594 result,
595 Outer {
596 inner: Some(Inner {
597 value: Some(Value {
598 text: "hello".into()
599 })
600 })
601 }
602 );
603 }
604
605 #[test]
606 fn test_deserialize_with_text() {
607 use facet_xml as xml;
608
609 #[derive(Debug, Facet, PartialEq)]
610 struct Article {
611 #[facet(default)]
612 title: Option<TitleElement>,
613 #[facet(default)]
614 content: Option<ContentElement>,
615 }
616
617 #[derive(Debug, Facet, PartialEq)]
618 struct TitleElement {
619 #[facet(xml::text, default)]
620 text: String,
621 }
622
623 #[derive(Debug, Facet, PartialEq)]
624 struct ContentElement {
625 #[facet(xml::text, default)]
626 text: String,
627 }
628
629 let html = b"<article><title>Hello</title><content>World</content></article>";
630 let parser = HtmlParser::new(html);
631 let mut deserializer = FormatDeserializer::new(parser);
632 let result: Article = deserializer.deserialize().unwrap();
633 assert_eq!(
634 result,
635 Article {
636 title: Some(TitleElement {
637 text: "Hello".into()
638 }),
639 content: Some(ContentElement {
640 text: "World".into()
641 })
642 }
643 );
644 }
645
646 #[test]
647 fn test_deserialize_multiple_attributes() {
648 #[derive(Debug, Facet, PartialEq)]
649 struct Link {
650 #[facet(default)]
651 href: Option<String>,
652 #[facet(default)]
653 target: Option<String>,
654 #[facet(default)]
655 rel: Option<String>,
656 }
657
658 let html = b"<a href=\"https://example.com\" target=\"_blank\" rel=\"noopener\"></a>";
659 let parser = HtmlParser::new(html);
660 let mut deserializer = FormatDeserializer::new(parser);
661 let result: Link = deserializer.deserialize().unwrap();
662 assert_eq!(
663 result,
664 Link {
665 href: Some("https://example.com".into()),
666 target: Some("_blank".into()),
667 rel: Some("noopener".into())
668 }
669 );
670 }
671
672 #[test]
673 fn test_deserialize_predefined_img() {
674 use facet_html_dom::Img;
675
676 let html = b"<img src=\"photo.jpg\" alt=\"A photo\" width=\"100\" height=\"200\">";
677 let parser = HtmlParser::new(html);
678 let mut deserializer = FormatDeserializer::new(parser);
679 let result: Img = deserializer.deserialize().unwrap();
680 assert_eq!(result.src, Some("photo.jpg".into()));
681 assert_eq!(result.alt, Some("A photo".into()));
682 assert_eq!(result.width, Some("100".into()));
683 assert_eq!(result.height, Some("200".into()));
684 }
685
686 #[test]
687 fn test_deserialize_predefined_a() {
688 use facet_html_dom::{A, PhrasingContent};
689
690 let html = b"<a href=\"https://example.com\" target=\"_blank\">Click me</a>";
691 let parser = HtmlParser::new(html);
692 let mut deserializer = FormatDeserializer::new(parser);
693 let result: A = deserializer.deserialize().unwrap();
694 assert_eq!(result.href, Some("https://example.com".into()));
695 assert_eq!(result.target, Some("_blank".into()));
696 assert_eq!(result.children.len(), 1);
697 assert!(matches!(&result.children[0], PhrasingContent::Text(t) if t == "Click me"));
698 }
699
700 #[test]
701 fn test_deserialize_predefined_div_with_class() {
702 use facet_html_dom::{Div, FlowContent};
703
704 let html = b"<div class=\"container\" id=\"main\">Hello World</div>";
705 let parser = HtmlParser::new(html);
706 let mut deserializer = FormatDeserializer::new(parser);
707 let result: Div = deserializer.deserialize().unwrap();
708 assert_eq!(result.attrs.class, Some("container".into()));
709 assert_eq!(result.attrs.id, Some("main".into()));
710 assert_eq!(result.children.len(), 1);
711 assert!(matches!(&result.children[0], FlowContent::Text(t) if t == "Hello World"));
712 }
713
714 #[test]
715 fn test_mixed_content_events() {
716 let html = b"<p>Hello <strong>world</strong> there</p>";
719 let events = build_events(html).unwrap();
720
721 assert_eq!(
729 events,
730 vec![
731 ParseEvent::StructStart(ContainerKind::Element),
732 ParseEvent::FieldKey(FieldKey::new(Cow::Borrowed("_tag"), FieldLocationHint::Tag)),
733 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("p".into()))),
734 ParseEvent::FieldKey(FieldKey::new(
735 Cow::Borrowed("_text"),
736 FieldLocationHint::Text
737 )),
738 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("Hello".into()))),
739 ParseEvent::FieldKey(FieldKey::new(
740 Cow::Owned("strong".into()),
741 FieldLocationHint::Child
742 )),
743 ParseEvent::StructStart(ContainerKind::Element),
744 ParseEvent::FieldKey(FieldKey::new(Cow::Borrowed("_tag"), FieldLocationHint::Tag)),
745 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("strong".into()))),
746 ParseEvent::FieldKey(FieldKey::new(
747 Cow::Borrowed("_text"),
748 FieldLocationHint::Text
749 )),
750 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("world".into()))),
751 ParseEvent::StructEnd,
752 ParseEvent::FieldKey(FieldKey::new(
753 Cow::Borrowed("_text"),
754 FieldLocationHint::Text
755 )),
756 ParseEvent::Scalar(ScalarValue::Str(Cow::Owned("there".into()))),
757 ParseEvent::StructEnd,
758 ]
759 );
760 }
761
762 #[test]
763 fn test_mixed_content_deserialization() {
764 use facet_html_dom::{P, PhrasingContent};
765
766 let html = b"<p>Hello <strong>world</strong> there</p>";
768 let parser = HtmlParser::new(html);
769 let mut deserializer = FormatDeserializer::new(parser);
770 let result: P = deserializer.deserialize().unwrap();
771
772 assert_eq!(result.children.len(), 3);
774 assert!(matches!(&result.children[0], PhrasingContent::Text(t) if t == "Hello"));
775 if let PhrasingContent::Strong(strong) = &result.children[1] {
777 assert_eq!(strong.children.len(), 1);
778 assert!(matches!(&strong.children[0], PhrasingContent::Text(t) if t == "world"));
779 } else {
780 panic!("Expected Strong element");
781 }
782 assert!(matches!(&result.children[2], PhrasingContent::Text(t) if t == "there"));
783 }
784
785 #[test]
786 fn test_mixed_content_multiple_elements() {
787 use facet_html_dom::{P, PhrasingContent};
788
789 let html = b"<p>Start <strong>bold</strong> middle <em>italic</em> end</p>";
791 let parser = HtmlParser::new(html);
792 let mut deserializer = FormatDeserializer::new(parser);
793 let result: P = deserializer.deserialize().unwrap();
794
795 assert_eq!(result.children.len(), 5);
796 assert!(matches!(&result.children[0], PhrasingContent::Text(t) if t == "Start"));
797 if let PhrasingContent::Strong(strong) = &result.children[1] {
799 assert_eq!(strong.children.len(), 1);
800 assert!(matches!(&strong.children[0], PhrasingContent::Text(t) if t == "bold"));
801 } else {
802 panic!("Expected Strong element");
803 }
804 assert!(matches!(&result.children[2], PhrasingContent::Text(t) if t == "middle"));
805 if let PhrasingContent::Em(em) = &result.children[3] {
806 assert_eq!(em.children.len(), 1);
807 assert!(matches!(&em.children[0], PhrasingContent::Text(t) if t == "italic"));
808 } else {
809 panic!("Expected Em element");
810 }
811 assert!(matches!(&result.children[4], PhrasingContent::Text(t) if t == "end"));
812 }
813
814 #[test]
815 fn test_deserialize_meta_charset() {
816 use facet_html_dom::Meta;
817
818 let html = b"<meta charset=\"utf-8\">";
822 let parser = HtmlParser::new(html);
823 let mut deserializer = FormatDeserializer::new(parser);
824 let result: Meta = deserializer.deserialize().unwrap();
825 assert_eq!(result.charset, Some("utf-8".into()));
826 }
827
828 #[test]
829 fn test_deserialize_head_with_meta_charset() {
830 use facet_html_dom::Head;
831
832 let html = b"<head><meta charset=\"utf-8\"><title>Test</title></head>";
835 let parser = HtmlParser::new(html);
836 let mut deserializer = FormatDeserializer::new(parser);
837 let result: Head = deserializer.deserialize().unwrap();
838
839 assert!(!result.meta.is_empty(), "Should have a meta element");
841 assert_eq!(result.meta[0].charset, Some("utf-8".into()));
842 }
843
844 #[test]
845 fn test_deserialize_full_html_document_with_meta_charset() {
846 use facet_html_dom::Html;
847
848 let html = br#"<!DOCTYPE html>
850<html>
851<head>
852 <meta charset="utf-8">
853 <title>Test Page</title>
854</head>
855<body>
856 <p>Hello</p>
857</body>
858</html>"#;
859
860 let parser = HtmlParser::new(html);
861 let mut deserializer = FormatDeserializer::new(parser);
862 let result: Html = deserializer.deserialize().unwrap();
863
864 let head = result.head.as_ref().expect("Should have head");
866 assert!(!head.meta.is_empty(), "Should have meta elements");
867 assert_eq!(head.meta[0].charset, Some("utf-8".into()));
868
869 let title = head.title.as_ref().expect("Should have title");
871 assert_eq!(title.text, "Test Page");
872
873 assert!(result.body.is_some(), "Should have body");
875 }
876
877 #[test]
878 fn test_doctype_captured() {
879 use facet_html_dom::Html;
880
881 let html = br#"<!DOCTYPE html>
883<html>
884<head><title>Test</title></head>
885<body></body>
886</html>"#;
887
888 let parser = HtmlParser::new(html);
889 let mut deserializer = FormatDeserializer::new(parser);
890 let result: Html = deserializer.deserialize().unwrap();
891
892 assert_eq!(
894 result.doctype,
895 Some("html".to_string()),
896 "DOCTYPE should be captured"
897 );
898 }
899
900 #[test]
901 fn test_doctype_not_present() {
902 use facet_html_dom::Html;
903
904 let html = br#"<html>
906<head><title>Test</title></head>
907<body></body>
908</html>"#;
909
910 let parser = HtmlParser::new(html);
911 let mut deserializer = FormatDeserializer::new(parser);
912 let result: Html = deserializer.deserialize().unwrap();
913
914 assert_eq!(
916 result.doctype, None,
917 "DOCTYPE should be None when not present"
918 );
919 }
920}