1use std::fmt;
2use url::ParseError;
3use reqwest::{Url, Client};
4use scraper::{Html, Selector, html::Select};
5
6pub struct UrlScraper {
8 url: Url,
9 html: Html,
10 selector: Selector,
11}
12
13impl UrlScraper {
14 pub async fn new(url: &str) -> Result<Self, Error> {
16 let client = Client::new();
17 Self::new_with_client(url, &client).await
18 }
19
20 pub async fn new_with_client(url: &str, client: &Client) -> Result<Self, Error> {
22 let url = Url::parse(url)?;
23 let resp = client.get(url.clone()).send().await?;
24 let html = resp.text().await?;
25
26 Ok(Self {
27 url,
28 html: Html::parse_document(&html),
29 selector: Selector::parse("a").expect("failed to create <a> selector"),
30 })
31 }
32
33 pub fn new_with_html(url: &str, html: &str) -> Result<Self, Error> {
36 Ok(Self {
37 url: Url::parse(url)?,
38 html: Html::parse_document(html),
39 selector: Selector::parse("a").expect("failed to create <a> selector"),
40 })
41 }
42
43 pub fn into_iter(&self) -> UrlIter<'_, '_> {
45 UrlIter {
46 url: &self.url,
47 data: self.html.select(&self.selector)
48 }
49 }
50}
51
52pub struct UrlIter<'a, 'b> {
54 url: &'a Url,
55 data: Select<'a, 'b>
56}
57
58impl Iterator for UrlIter<'_, '_> {
59 type Item = (String, Url);
60
61 fn next(&mut self) -> Option<Self::Item> {
62 for element in &mut self.data {
63 if let Some(url) = element.value().attr("href") {
64 if !url.starts_with('?') {
65 if let Ok(url) = self.url.join(url) {
66 return Some((element.inner_html(), url));
67 }
68 }
69 }
70 }
71 None
72 }
73}
74
75#[derive(Debug)]
76pub enum Error {
77 UrlParsing { why: ParseError },
78 Request { why: reqwest::Error }
79}
80
81impl From<url::ParseError> for Error {
82 fn from(why: url::ParseError) -> Error {
83 Error::UrlParsing { why }
84 }
85}
86
87impl From<reqwest::Error> for Error {
88 fn from(why: reqwest::Error) -> Error {
89 Error::Request { why }
90 }
91}
92
93impl fmt::Display for Error {
94 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
95 let error = match *self {
96 Error::UrlParsing { ref why } => format!("failed to parse URL: {}", why),
97 Error::Request { ref why } => format!("failure in request: {}", why),
98 };
99 f.write_str(&error)
100 }
101}