1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
extern crate html5ever;
extern crate curl;
extern crate serde_json;
mod http;
pub mod html;
mod opengraph;
mod parser;
mod schema_org;
use http::HTTP;
use html::HTML;
use std::io;
use std::str;
use std::time::Duration;
pub struct Webpage {
pub http: HTTP,
pub html: HTML,
}
pub struct WebpageOptions {
pub allow_insecure: bool,
pub follow_location: bool,
pub max_redirections: u32,
pub timeout: Duration,
pub useragent: String,
}
impl Default for WebpageOptions {
fn default() -> Self {
Self {
allow_insecure: false,
follow_location: true,
max_redirections: 5,
timeout: Duration::from_secs(10),
useragent: "Webpage - Rust crate - https://crates.io/crates/webpage".to_string(),
}
}
}
impl Webpage {
pub fn from_url(url: &str, options: WebpageOptions) -> Result<Self, io::Error> {
let http = HTTP::fetch(url, options)?;
let html = HTML::from_string(
http.body.clone(),
Some(http.url.clone())
)?;
Ok(Self {
http,
html,
})
}
}