mod html;
pub use html::{Link, HTML};
#[cfg(feature = "curl")]
mod http;
#[cfg(feature = "curl")]
pub use http::HTTP;
mod opengraph;
pub use opengraph::{Opengraph, OpengraphObject};
mod schema_org;
pub use schema_org::SchemaOrg;
mod parser;
#[cfg(feature = "curl")]
use std::time::Duration;
#[cfg(feature = "serde")]
#[macro_use]
extern crate serde;
#[derive(Debug)]
#[cfg(feature = "curl")]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[non_exhaustive]
pub struct Webpage {
pub http: HTTP,
pub html: HTML,
}
#[derive(Debug)]
#[cfg(feature = "curl")]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[non_exhaustive]
pub struct WebpageOptions {
pub allow_insecure: bool,
pub follow_location: bool,
pub max_redirections: u32,
pub timeout: Duration,
pub useragent: String,
pub headers: Vec<String>,
}
#[cfg(feature = "curl")]
impl Default for WebpageOptions {
fn default() -> Self {
Self {
allow_insecure: false,
follow_location: true,
max_redirections: 5,
timeout: Duration::from_secs(10),
useragent: "webpage-rs - https://crates.io/crates/webpage".to_string(),
headers: Vec::new(),
}
}
}
#[cfg(feature = "curl")]
impl Webpage {
pub fn from_url(url: &str, options: WebpageOptions) -> Result<Self, std::io::Error> {
let http = HTTP::fetch(url, options)?;
let html = HTML::from_string(http.body.clone(), Some(http.url.clone()))?;
Ok(Self { http, html })
}
}