use crate::error::Error;
use crate::page_archive::PageArchive;
use crate::parsing::{
mimetype_from_response, parse_resource_urls, ImageResource, Resource,
ResourceMap, ResourceUrl,
};
use crate::ArchiveOptions;
use reqwest::StatusCode;
use std::convert::TryInto;
use std::fmt::Display;
use url::Url;
pub fn archive<U>(url: U, options: ArchiveOptions) -> Result<PageArchive, Error>
where
U: TryInto<Url>,
<U as TryInto<Url>>::Error: Display,
{
let url: Url = url
.try_into()
.map_err(|e| Error::ParseError(format!("{}", e)))?;
let client = reqwest::blocking::Client::builder()
.use_native_tls()
.danger_accept_invalid_certs(options.accept_invalid_certificates)
.danger_accept_invalid_hostnames(options.accept_invalid_certificates)
.build()?;
let content = client.get(url.clone()).send()?.text()?;
let resource_urls = parse_resource_urls(&url, &content);
let mut resource_map = ResourceMap::new();
for resource_url in resource_urls {
use ResourceUrl::*;
let response = client.get(resource_url.url().clone()).send()?;
if response.status() != StatusCode::OK {
println!("Code: {}", response.status());
continue;
}
match resource_url {
Image(u) => {
let data = response.bytes()?;
let mimetype = mimetype_from_response(&data, &u);
resource_map.insert(
u,
Resource::Image(ImageResource { data, mimetype }),
);
}
Css(u) => {
resource_map.insert(u, Resource::Css(response.text()?));
}
Javascript(u) => {
resource_map.insert(u, Resource::Javascript(response.text()?));
}
}
}
Ok(PageArchive {
url,
content,
resource_map,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_invalid_url_blocking() {
let u = "this~is~not~a~url";
let res = archive(u, Default::default());
assert!(res.is_err());
if let Err(Error::ParseError(_err)) = res {
} else {
panic!("Expected parse error");
}
}
}