ppt_rs/web2ppt/
fetcher.rs1use super::{Web2PptError, Result, Web2PptConfig};
4use reqwest::blocking::Client;
5use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, ACCEPT_LANGUAGE, CACHE_CONTROL};
6use std::time::Duration;
7
8pub struct WebFetcher {
10 client: Client,
11 config: Web2PptConfig,
12}
13
14impl WebFetcher {
15 pub fn new() -> Result<Self> {
17 Self::with_config(Web2PptConfig::default())
18 }
19
20 pub fn with_config(config: Web2PptConfig) -> Result<Self> {
22 let mut headers = HeaderMap::new();
24 headers.insert(ACCEPT, HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"));
25 headers.insert(ACCEPT_LANGUAGE, HeaderValue::from_static("en-US,en;q=0.5"));
26 headers.insert(CACHE_CONTROL, HeaderValue::from_static("no-cache"));
27
28 let client = Client::builder()
29 .user_agent(&config.user_agent)
30 .timeout(Duration::from_secs(config.timeout_secs))
31 .default_headers(headers)
32 .redirect(reqwest::redirect::Policy::limited(10))
33 .build()
34 .map_err(|e| Web2PptError::FetchError(e.to_string()))?;
35
36 Ok(WebFetcher { client, config })
37 }
38
39 pub fn fetch(&self, url: &str) -> Result<String> {
41 let parsed_url = url::Url::parse(url)
43 .map_err(|e| Web2PptError::InvalidUrl(e.to_string()))?;
44
45 if parsed_url.scheme() != "http" && parsed_url.scheme() != "https" {
47 return Err(Web2PptError::InvalidUrl(
48 "Only HTTP and HTTPS URLs are supported".to_string()
49 ));
50 }
51
52 let response = self.client
54 .get(url)
55 .header("Referer", url)
56 .send()
57 .map_err(|e| Web2PptError::FetchError(e.to_string()))?;
58
59 if !response.status().is_success() {
61 return Err(Web2PptError::FetchError(
62 format!("HTTP {}: {}", response.status().as_u16(), response.status().as_str())
63 ));
64 }
65
66 response.text()
68 .map_err(|e| Web2PptError::FetchError(e.to_string()))
69 }
70
71 pub fn fetch_with_url(&self, url: &str) -> Result<(String, String)> {
73 let html = self.fetch(url)?;
74 Ok((url.to_string(), html))
75 }
76
77 pub fn config(&self) -> &Web2PptConfig {
79 &self.config
80 }
81}
82
83impl Default for WebFetcher {
84 fn default() -> Self {
85 Self::new().expect("Failed to create default WebFetcher")
86 }
87}
88
89#[cfg(test)]
90mod tests {
91 use super::*;
92
93 #[test]
94 fn test_invalid_url() {
95 let fetcher = WebFetcher::new().unwrap();
96 let result = fetcher.fetch("not-a-url");
97 assert!(result.is_err());
98 }
99
100 #[test]
101 fn test_invalid_scheme() {
102 let fetcher = WebFetcher::new().unwrap();
103 let result = fetcher.fetch("ftp://example.com");
104 assert!(result.is_err());
105 }
106
107 #[test]
108 fn test_config() {
109 let config = Web2PptConfig::new().timeout(60);
110 let fetcher = WebFetcher::with_config(config).unwrap();
111 assert_eq!(fetcher.config().timeout_secs, 60);
112 }
113}