#![warn(missing_docs)]
#![forbid(unsafe_code)]
pub use error::Error;
pub use page_archive::PageArchive;
use parsing::{mimetype_from_response, parse_resource_urls};
pub use parsing::{ImageResource, Resource, ResourceMap, ResourceUrl};
use reqwest::StatusCode;
use std::convert::TryInto;
use std::fmt::Display;
use url::Url;
pub mod error;
pub mod page_archive;
pub mod parsing;
#[cfg(feature = "blocking")]
pub mod blocking;
pub async fn archive<U>(
url: U,
options: ArchiveOptions,
) -> Result<PageArchive, Error>
where
U: TryInto<Url>,
<U as TryInto<Url>>::Error: Display,
{
let url: Url = url
.try_into()
.map_err(|e| Error::ParseError(format!("{}", e)))?;
let client = reqwest::Client::builder()
.use_native_tls()
.danger_accept_invalid_certs(options.accept_invalid_certificates)
.danger_accept_invalid_hostnames(options.accept_invalid_certificates)
.build()?;
let content = client.get(url.clone()).send().await?.text().await?;
let resource_urls = parse_resource_urls(&url, &content);
let mut resource_map = ResourceMap::new();
for resource_url in resource_urls {
use ResourceUrl::*;
let response = client.get(resource_url.url().clone()).send().await?;
if response.status() != StatusCode::OK {
continue;
}
match resource_url {
Image(u) => {
let data = response.bytes().await?;
let mimetype = mimetype_from_response(&data, &u);
resource_map.insert(
u,
Resource::Image(ImageResource { data, mimetype }),
);
}
Css(u) => {
resource_map.insert(u, Resource::Css(response.text().await?));
}
Javascript(u) => {
resource_map
.insert(u, Resource::Javascript(response.text().await?));
}
}
}
Ok(PageArchive {
url,
content,
resource_map,
})
}
pub struct ArchiveOptions {
pub accept_invalid_certificates: bool,
}
impl Default for ArchiveOptions {
fn default() -> Self {
Self {
accept_invalid_certificates: false,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tokio_test::block_on;
#[test]
fn parse_invalid_url_async() {
let u = "this~is~not~a~url";
let res = block_on(archive(u, Default::default()));
assert!(res.is_err());
if let Err(Error::ParseError(_err)) = res {
} else {
panic!("Expected parse error");
}
}
}