scraper/element_ref/
mod.rs1use std::fmt::{self, Debug};
4use std::iter::FusedIterator;
5use std::ops::Deref;
6
7use ego_tree::iter::{Edge, Traverse};
8use ego_tree::NodeRef;
9use html5ever::serialize::{serialize, SerializeOpts, TraversalScope};
10use selectors::matching::SelectorCaches;
11
12use crate::node::Element;
13use crate::{Node, Selector};
14
15#[derive(Clone, Copy, PartialEq, Eq)]
20pub struct ElementRef<'a> {
21 node: NodeRef<'a, Node>,
22}
23
24impl<'a> ElementRef<'a> {
25 fn new(node: NodeRef<'a, Node>) -> Self {
26 ElementRef { node }
27 }
28
29 pub fn wrap(node: NodeRef<'a, Node>) -> Option<Self> {
31 if node.value().is_element() {
32 Some(ElementRef::new(node))
33 } else {
34 None
35 }
36 }
37
38 pub fn value(&self) -> &'a Element {
40 self.node.value().as_element().unwrap()
41 }
42
43 pub fn select<'b>(&self, selector: &'b Selector) -> Select<'a, 'b> {
45 let mut inner = self.traverse();
46 inner.next(); Select {
49 scope: *self,
50 inner,
51 selector,
52 caches: Default::default(),
53 }
54 }
55
56 fn serialize(&self, traversal_scope: TraversalScope) -> String {
57 let opts = SerializeOpts {
58 scripting_enabled: false, traversal_scope,
60 create_missing_parent: false,
61 };
62 let mut buf = Vec::new();
63 serialize(&mut buf, self, opts).unwrap();
64 String::from_utf8(buf).unwrap()
65 }
66
67 pub fn html(&self) -> String {
69 self.serialize(TraversalScope::IncludeNode)
70 }
71
72 pub fn inner_html(&self) -> String {
74 self.serialize(TraversalScope::ChildrenOnly(None))
75 }
76
77 pub fn attr(&self, attr: &str) -> Option<&'a str> {
79 self.value().attr(attr)
80 }
81
82 pub fn text(&self) -> Text<'a> {
84 Text {
85 inner: self.traverse(),
86 }
87 }
88
89 pub fn child_elements(&self) -> impl Iterator<Item = ElementRef<'a>> {
101 self.children().filter_map(ElementRef::wrap)
102 }
103
104 pub fn descendent_elements(&self) -> impl Iterator<Item = ElementRef<'a>> {
116 self.descendants().filter_map(ElementRef::wrap)
117 }
118}
119
120impl Debug for ElementRef<'_> {
121 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
122 Debug::fmt(self.value(), f)
123 }
124}
125
126impl<'a> Deref for ElementRef<'a> {
127 type Target = NodeRef<'a, Node>;
128 fn deref(&self) -> &NodeRef<'a, Node> {
129 &self.node
130 }
131}
132
133pub struct Select<'a, 'b> {
135 scope: ElementRef<'a>,
136 inner: Traverse<'a, Node>,
137 selector: &'b Selector,
138 caches: SelectorCaches,
139}
140
141impl Debug for Select<'_, '_> {
142 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
143 fmt.debug_struct("Select")
144 .field("scope", &self.scope)
145 .field("inner", &self.inner)
146 .field("selector", &self.selector)
147 .field("caches", &"..")
148 .finish()
149 }
150}
151
152impl Clone for Select<'_, '_> {
153 fn clone(&self) -> Self {
154 Self {
155 scope: self.scope,
156 inner: self.inner.clone(),
157 selector: self.selector,
158 caches: Default::default(),
159 }
160 }
161}
162
163impl<'a> Iterator for Select<'a, '_> {
164 type Item = ElementRef<'a>;
165
166 fn next(&mut self) -> Option<ElementRef<'a>> {
167 for edge in &mut self.inner {
168 if let Edge::Open(node) = edge {
169 if let Some(element) = ElementRef::wrap(node) {
170 if self.selector.matches_with_scope_and_cache(
171 &element,
172 Some(self.scope),
173 &mut self.caches,
174 ) {
175 return Some(element);
176 }
177 }
178 }
179 }
180 None
181 }
182}
183
184impl FusedIterator for Select<'_, '_> {}
185
186#[derive(Debug, Clone)]
188pub struct Text<'a> {
189 inner: Traverse<'a, Node>,
190}
191
192impl<'a> Iterator for Text<'a> {
193 type Item = &'a str;
194
195 fn next(&mut self) -> Option<&'a str> {
196 for edge in &mut self.inner {
197 if let Edge::Open(node) = edge {
198 if let Node::Text(ref text) = node.value() {
199 return Some(&**text);
200 }
201 }
202 }
203 None
204 }
205}
206
207impl FusedIterator for Text<'_> {}
208
209mod element;
210mod serializable;
211
212#[cfg(test)]
213mod tests {
214 use crate::html::Html;
215 use crate::selector::Selector;
216
217 #[test]
218 fn test_scope() {
219 let html = r"
220 <div>
221 <b>1</b>
222 <span>
223 <span><b>2</b></span>
224 <b>3</b>
225 </span>
226 </div>
227 ";
228 let fragment = Html::parse_fragment(html);
229 let sel1 = Selector::parse("div > span").unwrap();
230 let sel2 = Selector::parse(":scope > b").unwrap();
231
232 let element1 = fragment.select(&sel1).next().unwrap();
233 let element2 = element1.select(&sel2).next().unwrap();
234 assert_eq!(element2.inner_html(), "3");
235 }
236}