1use crate::error::Error;
26use crate::page_archive::PageArchive;
27use crate::parsing::{
28 mimetype_from_response, parse_resource_urls, ImageResource, Resource,
29 ResourceMap, ResourceUrl,
30};
31use crate::ArchiveOptions;
32use reqwest::StatusCode;
33use std::convert::TryInto;
34use std::fmt::Display;
35use url::Url;
36
37pub fn archive<U>(url: U, options: ArchiveOptions) -> Result<PageArchive, Error>
43where
44 U: TryInto<Url>,
45 <U as TryInto<Url>>::Error: Display,
46{
47 let url: Url = url
48 .try_into()
49 .map_err(|e| Error::ParseError(format!("{}", e)))?;
50
51 let client = reqwest::blocking::Client::builder()
53 .use_native_tls()
54 .danger_accept_invalid_certs(options.accept_invalid_certificates)
55 .danger_accept_invalid_hostnames(options.accept_invalid_certificates)
56 .build()?;
57
58 let content = client.get(url.clone()).send()?.text()?;
60
61 let resource_urls = parse_resource_urls(&url, &content);
63 let mut resource_map = ResourceMap::new();
64
65 for resource_url in resource_urls {
67 use ResourceUrl::*;
68
69 let response = client.get(resource_url.url().clone()).send()?;
70 if response.status() != StatusCode::OK {
71 println!("Code: {}", response.status());
73 continue;
74 }
75 match resource_url {
76 Image(u) => {
77 let data = response.bytes()?;
78 let mimetype = mimetype_from_response(&data, &u);
79 resource_map.insert(
80 u,
81 Resource::Image(ImageResource { data, mimetype }),
82 );
83 }
84 Css(u) => {
85 resource_map.insert(u, Resource::Css(response.text()?));
86 }
87 Javascript(u) => {
88 resource_map.insert(u, Resource::Javascript(response.text()?));
89 }
90 }
91 }
92
93 Ok(PageArchive {
94 url,
95 content,
96 resource_map,
97 })
98}
99
100#[cfg(test)]
101mod tests {
102 use super::*;
103
104 #[test]
105 fn parse_invalid_url_blocking() {
106 let u = "this~is~not~a~url";
107
108 let res = archive(u, Default::default());
109 assert!(res.is_err());
110
111 if let Err(Error::ParseError(_err)) = res {
112 } else {
114 panic!("Expected parse error");
115 }
116 }
117}