sws_scraper/element_ref/
mod.rs1mod element;
4mod serializable;
5
6use std::iter::FusedIterator;
7use std::ops::Deref;
8
9use html5ever::serialize::{serialize, SerializeOpts, TraversalScope};
10use sws_tree::iter::{Edge, Traverse};
11use sws_tree::NodeRef;
12
13use crate::node::{Element, Node};
14use crate::selector::Selector;
15
16#[derive(Debug, Clone, PartialEq)]
21pub struct ElementRef {
22 node: NodeRef<Node>,
23}
24
25impl ElementRef {
26 fn new(node: NodeRef<Node>) -> Self {
27 ElementRef { node }
28 }
29
30 pub fn wrap(node: NodeRef<Node>) -> Option<Self> {
32 match node.map_value(|v| v.is_element()) {
33 Some(true) => Some(ElementRef::new(node)),
34 _ => None,
35 }
36 }
37
38 pub fn map_value<F, R>(&self, map_fn: F) -> Option<R>
40 where
41 F: FnOnce(&Element) -> R,
42 {
43 self.node.map_value(|v| map_fn(v.as_element().unwrap()))
44 }
45
46 pub fn select(&self, selector: Selector) -> Select {
48 let mut inner = self.traverse();
49 inner.next(); Select {
52 scope: self.clone(),
53 inner,
54 selector,
55 }
56 }
57
58 fn serialize(&self, traversal_scope: TraversalScope) -> String {
59 let opts = SerializeOpts {
60 scripting_enabled: false, traversal_scope,
62 create_missing_parent: false,
63 };
64 let mut buf = Vec::new();
65 serialize(&mut buf, self, opts).unwrap();
66 String::from_utf8(buf).unwrap()
67 }
68
69 pub fn html(&self) -> String {
71 self.serialize(TraversalScope::IncludeNode)
72 }
73
74 pub fn inner_html(&self) -> String {
76 self.serialize(TraversalScope::ChildrenOnly(None))
77 }
78
79 pub fn text(&self) -> Text {
81 Text {
82 inner: self.traverse(),
83 }
84 }
85
86 pub fn inner_text(&self) -> String {
88 let mut all_text = String::new();
89 for edge in self.traverse() {
90 if let Edge::Open(node) = edge {
91 node.map_value(|v| {
92 if let Node::Text(ref text) = v {
93 all_text.push_str(text);
94 }
95 });
96 }
97 }
98 all_text
99 }
100}
101
102impl Deref for ElementRef {
103 type Target = NodeRef<Node>;
104
105 fn deref(&self) -> &NodeRef<Node> {
106 &self.node
107 }
108}
109
110#[derive(Debug, Clone)]
112pub struct Select {
113 scope: ElementRef,
114 inner: Traverse<Node>,
115 selector: Selector,
116}
117
118impl Iterator for Select {
119 type Item = ElementRef;
120
121 fn next(&mut self) -> Option<ElementRef> {
122 for edge in &mut self.inner {
123 if let Edge::Open(node) = edge {
124 if let Some(element) = ElementRef::wrap(node) {
125 if self
126 .selector
127 .matches_with_scope(&element, Some(self.scope.clone()))
128 {
129 return Some(element);
130 }
131 }
132 }
133 }
134 None
135 }
136}
137
138impl FusedIterator for Select {}
139
140#[derive(Debug, Clone)]
142pub struct Text {
143 inner: Traverse<Node>,
144}
145
146impl Iterator for Text {
147 type Item = String;
148
149 fn next(&mut self) -> Option<String> {
150 for edge in &mut self.inner {
151 if let Edge::Open(node) = edge {
152 let text = node
153 .map_value(|v| {
154 if let Node::Text(ref text) = v {
155 Some(text.to_string())
156 } else {
157 None
158 }
159 })
160 .unwrap();
161 if text.is_some() {
162 return text;
163 }
164 }
165 }
166 None
167 }
168}
169
170#[cfg(test)]
171mod tests {
172 use crate::html::Html;
173 use crate::selector::Selector;
174
175 #[test]
176 fn test_scope() {
177 let html = r"
178 <div>
179 <b>1</b>
180 <span>
181 <span><b>2</b></span>
182 <b>3</b>
183 </span>
184 </div>
185 ";
186 let fragment = Html::parse_fragment(html);
187 let sel1 = Selector::parse("div > span").unwrap();
188 let sel2 = Selector::parse(":scope > b").unwrap();
189
190 let element1 = fragment.select(sel1).next().unwrap();
191 let element2 = element1.select(sel2).next().unwrap();
192 assert_eq!(element2.inner_html(), "3");
193 }
194}