dom_manipulator/html/
mod.rs1use std::borrow::Cow;
4
5use ego_tree::iter::Nodes;
6use ego_tree::Tree;
7use html5ever::serialize::SerializeOpts;
8use html5ever::tree_builder::QuirksMode;
9use html5ever::QualName;
10use html5ever::{driver, serialize};
11use tendril::TendrilSink;
12
13use crate::selector::Selector;
14use crate::{ElementRef, Node};
15
16#[derive(Debug, Clone, PartialEq, Eq)]
23pub struct Html {
24 pub errors: Vec<Cow<'static, str>>,
26
27 pub quirks_mode: QuirksMode,
29
30 pub tree: Tree<Node>,
32}
33
34impl Html {
35 pub fn new_document() -> Self {
37 Html {
38 errors: Vec::new(),
39 quirks_mode: QuirksMode::NoQuirks,
40 tree: Tree::new(Node::Document),
41 }
42 }
43
44 pub fn new_fragment() -> Self {
46 Html {
47 errors: Vec::new(),
48 quirks_mode: QuirksMode::NoQuirks,
49 tree: Tree::new(Node::Fragment),
50 }
51 }
52
53 pub fn parse_document(document: &str) -> Self {
72 let parser = driver::parse_document(Self::new_document(), Default::default());
73 parser.one(document)
74 }
75
76 pub fn parse_fragment(fragment: &str) -> Self {
78 let parser = driver::parse_fragment(
79 Self::new_fragment(),
80 Default::default(),
81 QualName::new(None, ns!(html), local_name!("body")),
82 Vec::new(),
83 );
84 parser.one(fragment)
85 }
86
87 pub fn select<'a, 'b>(&'a self, selector: &'b Selector) -> Select<'a, 'b> {
89 Select {
90 inner: self.tree.nodes(),
91 selector,
92 }
93 }
94
95 pub fn root_element(&self) -> ElementRef {
97 let root_node = self
98 .tree
99 .root()
100 .children()
101 .find(|child| child.value().is_element())
102 .expect("html node missing");
103 ElementRef::wrap(root_node).unwrap()
104 }
105
106 pub fn html(&self) -> String {
108 let opts = SerializeOpts {
109 scripting_enabled: false, traversal_scope: html5ever::serialize::TraversalScope::IncludeNode,
111 create_missing_parent: false,
112 };
113 let mut buf = Vec::new();
114 serialize(&mut buf, self, opts).unwrap();
115 String::from_utf8(buf).unwrap()
116 }
117}
118
119#[derive(Debug)]
121pub struct Select<'a, 'b> {
122 inner: Nodes<'a, Node>,
123 selector: &'b Selector,
124}
125
126impl<'a, 'b> Iterator for Select<'a, 'b> {
127 type Item = ElementRef<'a>;
128
129 fn next(&mut self) -> Option<ElementRef<'a>> {
130 for node in self.inner.by_ref() {
131 if let Some(element) = ElementRef::wrap(node) {
132 if element.parent().is_some() && self.selector.matches(&element) {
133 return Some(element);
134 }
135 }
136 }
137 None
138 }
139}
140
141mod serializable;
142mod tree_sink;
143
144#[cfg(test)]
145mod tests {
146 use super::Html;
147 use super::Selector;
148
149 #[test]
150 fn root_element_fragment() {
151 let html = Html::parse_fragment(r#"<a href="http://github.com">1</a>"#);
152 let root_ref = html.root_element();
153 let href = root_ref
154 .select(&Selector::parse("a").unwrap())
155 .next()
156 .unwrap();
157 assert_eq!(href.inner_html(), "1");
158 assert_eq!(href.value().attr("href").unwrap(), "http://github.com");
159 }
160
161 #[test]
162 fn root_element_document_doctype() {
163 let html = Html::parse_document("<!DOCTYPE html>\n<title>abc</title>");
164 let root_ref = html.root_element();
165 let title = root_ref
166 .select(&Selector::parse("title").unwrap())
167 .next()
168 .unwrap();
169 assert_eq!(title.inner_html(), "abc");
170 }
171
172 #[test]
173 fn root_element_document_comment() {
174 let html = Html::parse_document("<!-- comment --><title>abc</title>");
175 let root_ref = html.root_element();
176 let title = root_ref
177 .select(&Selector::parse("title").unwrap())
178 .next()
179 .unwrap();
180 assert_eq!(title.inner_html(), "abc");
181 }
182}