1extern crate reqwest;
15extern crate scraper;
16
17use reqwest::{Client, Url};
18use scraper::{Html, html::Select, Selector};
19use std::fmt;
20
21pub struct UrlScraper {
23 url: Url,
24 html: Html,
25 selector: Selector,
26}
27
28impl UrlScraper {
29 pub fn new(url: &str) -> Result<Self, Error> {
31 let client = Client::new();
32 Self::new_with_client(url, &client)
33 }
34
35 pub fn new_with_client(url: &str, client: &Client) -> Result<Self, Error> {
37 let url = Url::parse(url)?;
38 let mut resp = client.get(url.clone()).send()?;
39 let html = resp.text()?;
40
41 Ok(Self {
42 url,
43 html: Html::parse_document(&html),
44 selector: Selector::parse("a").expect("failed to create <a> selector"),
45 })
46 }
47
48 pub fn new_with_html(url: &str, html: &str) -> Result<Self, Error> {
50 Ok(Self {
51 url: Url::parse(url)?,
52 html: Html::parse_document(html),
53 selector: Selector::parse("a").expect("failed to create <a> selector"),
54 })
55 }
56
57 pub fn into_iter<'a>(&'a self) -> UrlIter<'a, 'a> {
59 UrlIter {
60 url: &self.url,
61 data: self.html.select(&self.selector)
62 }
63 }
64}
65
66pub struct UrlIter<'a, 'b> {
68 url: &'a Url,
69 data: Select<'a, 'b>
70}
71
72impl<'a, 'b> Iterator for UrlIter<'a, 'b> {
73 type Item = (String, Url);
74
75 fn next(&mut self) -> Option<Self::Item> {
76 for element in &mut self.data {
77 if let Some(url) = element.value().attr("href") {
78 if ! url.starts_with('?') {
79 if let Ok(url) = self.url.join(url) {
80 return Some((element.inner_html(), url));
81 }
82 }
83 }
84 }
85
86 None
87 }
88}
89
90#[derive(Debug)]
91pub enum Error {
92 UrlParsing { why: reqwest::UrlError },
93 Request { why: reqwest::Error }
94}
95
96impl fmt::Display for Error {
97 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
98 let error = match *self {
99 Error::UrlParsing { ref why } => format!("failed to parse URL: {}", why),
100 Error::Request { ref why } => format!("failure in request: {}", why),
101 };
102 f.write_str(&error)
103 }
104}
105
106impl From<reqwest::UrlError> for Error {
107 fn from(why: reqwest::UrlError) -> Error {
108 Error::UrlParsing { why }
109 }
110}
111
112impl From<reqwest::Error> for Error {
113 fn from(why: reqwest::Error) -> Error {
114 Error::Request { why }
115 }
116}