reqwest_scraper/
css_selector.rs1use crate::error::Result;
4use itertools::Itertools;
5use scraper::ElementRef;
6
7pub struct Html {
9 value: scraper::Html,
10}
11
12impl Html {
13 pub fn new(html_str: &str) -> Self {
15 Self {
16 value: scraper::Html::parse_fragment(html_str),
17 }
18 }
19 pub fn select(&self, selector: &str) -> Result<Selectable<scraper::Html>> {
21 Selectable::wrap(selector, &self.value)
22 }
23}
24
25pub struct Selectable<'a, T> {
27 selector: scraper::Selector,
28 node: &'a T,
29}
30
31pub struct HtmlSelectIterator<'a, 'b> {
33 select: scraper::html::Select<'a, 'b>,
34}
35
36pub struct ElementSelectIterator<'a, 'b> {
38 select: scraper::element_ref::Select<'a, 'b>,
39}
40
41pub struct SelectItem<'a> {
43 element: ElementRef<'a>,
44}
45
46impl<'a, T> Selectable<'a, T> {
47 fn wrap(selector: &str, html: &'a T) -> Result<Selectable<'a, T>> {
48 Ok(Self {
49 selector: scraper::Selector::parse(selector)?,
50 node: html,
51 })
52 }
53}
54
55pub type SelectableHtml<'a> = Selectable<'a, scraper::Html>;
57
58impl<'a> Selectable<'a, scraper::Html> {
59 pub fn iter(&self) -> HtmlSelectIterator {
61 HtmlSelectIterator {
62 select: self.node.select(&self.selector),
63 }
64 }
65
66 pub fn first(&self) -> Option<SelectItem> {
68 self.iter().next()
69 }
70}
71
72pub type SelectableElement<'a> = Selectable<'a, ElementRef<'a>>;
74
75impl<'a> Selectable<'a, ElementRef<'a>> {
76 pub fn iter(&self) -> ElementSelectIterator {
78 ElementSelectIterator {
79 select: self.node.select(&self.selector),
80 }
81 }
82
83 pub fn first(&self) -> Option<SelectItem> {
85 self.iter().next()
86 }
87}
88
89impl<'a, 'b> Iterator for HtmlSelectIterator<'a, 'b> {
90 type Item = SelectItem<'a>;
91
92 fn next(&mut self) -> Option<Self::Item> {
93 self.select.next().map(Self::Item::new)
94 }
95}
96
97impl<'a, 'b> Iterator for ElementSelectIterator<'a, 'b> {
98 type Item = SelectItem<'a>;
99
100 fn next(&mut self) -> Option<Self::Item> {
101 self.select.next().map(Self::Item::new)
102 }
103}
104
105pub type CaseSensitivity = scraper::CaseSensitivity;
107pub type Classes<'a> = scraper::node::Classes<'a>;
109pub type Attrs<'a> = scraper::node::Attrs<'a>;
111
112impl<'a> SelectItem<'a> {
113 pub fn new(element: ElementRef<'a>) -> Self {
115 Self { element }
116 }
117
118 pub fn name(&self) -> &str {
120 self.element.value().name()
121 }
122
123 pub fn id(&self) -> Option<&str> {
125 self.element.value().id()
126 }
127
128 pub fn has_class(&self, class: &str, case_sensitive: CaseSensitivity) -> bool {
130 self.element.value().has_class(class, case_sensitive)
131 }
132
133 pub fn classes(&self) -> Classes {
135 self.element.value().classes()
136 }
137
138 pub fn attrs(&self) -> Attrs {
140 self.element.value().attrs()
141 }
142
143 pub fn attr(&self, attr: &str) -> Option<&'a str> {
145 self.element.attr(attr)
146 }
147
148 pub fn text(&self) -> String {
150 self.element.text().join("").trim().into()
151 }
152
153 pub fn html(&self) -> String {
155 self.element.html()
156 }
157
158 pub fn inner_html(&self) -> String {
160 self.element.inner_html()
161 }
162
163 pub fn children(&self) -> impl Iterator<Item = SelectItem<'a>> {
165 self.element.child_elements().map(SelectItem::new)
166 }
167
168 pub fn select(&self, selector: &str) -> Result<Selectable<'a, ElementRef>> {
170 Selectable::wrap(selector, &self.element)
171 }
172}
173
174#[cfg(test)]
175mod tests {
176
177 #[test]
178 fn test_css_selecctor() {
179 use super::*;
180
181 let html_str = r#"
182 <html>
183 <body>
184 <div id="content">
185 <p>Hello, World!</p>
186 <p>This is a test.</p>
187 </div>
188 </body>
189 </html>
190 "#;
191
192 let html = Html::new(html_str);
193 let content = html.select("#content").unwrap();
194 let content = content.first();
195 assert!(content.is_some());
196 let content = content.unwrap();
197 assert_eq!(content.attr("id").unwrap(), "content");
198
199 let p1 = content.select("p:nth-child(1)").ok().unwrap();
200 let p1 = p1.first();
201 assert!(p1.is_some());
202 assert_eq!(p1.unwrap().text(), "Hello, World!");
203
204 let p2 = content.select("p:nth-child(2)").ok().unwrap();
205 let p2 = p2.first();
206 assert!(p2.is_some());
207 assert_eq!(p2.unwrap().text(), "This is a test.");
208 }
209}