http_downloader/
downloader_builder.rs

1use std::borrow::Cow;
2use std::num::{NonZeroU8, NonZeroUsize};
3use std::path::PathBuf;
4use std::sync::Arc;
5use std::time::Duration;
6
7use headers::{ETag, HeaderMap, HeaderMapExt};
8use tokio_util::sync::CancellationToken;
9use url::Url;
10
11use crate::{DownloadExtensionBuilder, ExtendedHttpFileDownloader, HttpFileDownloader};
12
13#[derive(Debug, PartialEq)]
14pub enum HttpRedirectionHandle {
15    Invalid,
16    RequestNewLocation {
17        max_times: usize
18    },
19}
20
21pub struct HttpDownloadConfig {
22    // 提前设置长度,如果存储空间不足将提前报错
23    pub set_len_in_advance: bool,
24    pub download_connection_count: NonZeroU8,
25    pub chunk_size: NonZeroUsize,
26    pub chunks_send_interval: Option<Duration>,
27    pub save_dir: PathBuf,
28    pub file_name: String,
29    pub open_option: Box<dyn Fn(&mut std::fs::OpenOptions) + Send + Sync + 'static>,
30    pub create_dir: bool,
31    pub url: Arc<Url>,
32    pub etag: Option<ETag>,
33    pub request_retry_count: u8,
34    // pub timeout: Option<Duration>,
35    pub header_map: HeaderMap,
36    pub downloaded_len_send_interval: Option<Duration>,
37    pub strict_check_accept_ranges: bool,
38    pub http_request_configure: Option<Box<dyn Fn(reqwest::Request) -> reqwest::Request + Send + Sync + 'static>>,
39    pub cancel_token: Option<CancellationToken>,
40    pub handle_redirection: HttpRedirectionHandle,
41    pub use_browser_user_agent: bool,
42}
43
44impl HttpDownloadConfig {
45    /// 下载文件路径
46    pub fn file_path(&self) -> PathBuf {
47        self.save_dir.join(&self.file_name)
48    }
49
50    pub(crate) fn create_http_request(&self, redirection_location: Option<&str>) -> reqwest::Request {
51        let mut url = (*self.url).clone().clone();
52        if let Some(location) = redirection_location {
53            url.set_path(location);
54        }
55        let mut request = reqwest::Request::new(reqwest::Method::GET, url);
56        let header_map = request.headers_mut();
57        if self.use_browser_user_agent {
58            header_map.insert(reqwest::header::USER_AGENT, headers::HeaderValue::from_str("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.48").unwrap());
59        }
60        header_map.insert(reqwest::header::ACCEPT, headers::HeaderValue::from_str("*/*").unwrap());
61        header_map.typed_insert(headers::Connection::keep_alive());
62        for (header_name, header_value) in self.header_map.iter() {
63            header_map.insert(header_name, header_value.clone());
64        }
65        // 限速后超时会出现异常?
66        *request.timeout_mut() = None;
67        // *request.timeout_mut() = self.config.timeout;
68        match self.http_request_configure.as_ref() {
69            None => { request }
70            Some(configure) => {
71                configure(request)
72            }
73        }
74    }
75}
76
77pub struct HttpDownloaderBuilder {
78    chunk_size: NonZeroUsize,
79    download_connection_count: NonZeroU8,
80    url: Url,
81    save_dir: PathBuf,
82    set_len_in_advance: bool,
83    file_name: Option<String>,
84    open_option: Box<dyn Fn(&mut std::fs::OpenOptions) + Send + Sync + 'static>,
85    create_dir: bool,
86    request_retry_count: u8,
87    // timeout: Option<Duration>,
88    etag: Option<ETag>,
89    client: Option<reqwest::Client>,
90    header_map: HeaderMap,
91    downloaded_len_send_interval: Option<Duration>,
92    chunks_send_interval: Option<Duration>,
93    strict_check_accept_ranges: bool,
94    http_request_configure: Option<Box<dyn Fn(reqwest::Request) -> reqwest::Request + Send + Sync + 'static>>,
95    cancel_token: Option<CancellationToken>,
96    handle_redirection: HttpRedirectionHandle,
97    use_browser_user_agent: bool,
98}
99
100impl HttpDownloaderBuilder {
101    pub fn new(url: Url, save_dir: PathBuf) -> Self {
102        Self {
103            client: None,
104            chunk_size: NonZeroUsize::new(1024 * 1024 * 4).unwrap(), // 4M,
105            file_name: None,
106            open_option: Box::new(|o| {
107                o.create(true).write(true);
108            }),
109            create_dir: true,
110            request_retry_count: 3,
111            download_connection_count: NonZeroU8::new(3).unwrap(),
112            url,
113            save_dir,
114            etag: None,
115            // timeout: None,
116            header_map: Default::default(),
117            downloaded_len_send_interval: Some(Duration::from_millis(300)),
118            chunks_send_interval: Some(Duration::from_millis(300)),
119            strict_check_accept_ranges: true,
120            http_request_configure: None,
121            set_len_in_advance: false,
122            cancel_token: None,
123            handle_redirection: HttpRedirectionHandle::RequestNewLocation {
124                max_times: 8
125            },
126            use_browser_user_agent: true,
127        }
128    }
129
130    pub fn client(mut self, client: Option<reqwest::Client>) -> Self {
131        self.client = client;
132        self
133    }
134
135    /// 当目录不存在时,是否创建它
136    pub fn create_dir(mut self, create_dir: bool) -> Self {
137        self.create_dir = create_dir;
138        self
139    }
140
141    pub fn cancel_token(mut self, cancel_token: Option<CancellationToken>) -> Self {
142        self.cancel_token = cancel_token;
143        self
144    }
145
146    pub fn handle_redirection(mut self, http_redirection_handle: HttpRedirectionHandle) -> Self {
147        self.handle_redirection = http_redirection_handle;
148        self
149    }
150
151    /// 是否提前设置文件长度
152    pub fn set_len_in_advance(mut self, set_len_in_advance: bool) -> Self {
153        self.set_len_in_advance = set_len_in_advance;
154        self
155    }
156
157    /// HTTP 请求重试次数
158    pub fn request_retry_count(mut self, request_retry_count: u8) -> Self {
159        self.request_retry_count = request_retry_count;
160        self
161    }
162
163    /// 请求头自定义
164    pub fn header_map(mut self, header_map: HeaderMap) -> Self {
165        self.header_map = header_map;
166        self
167    }
168    /// 使用浏览器 User Agent
169    pub fn use_browser_user_agent(mut self, use_browser_user_agent: bool) -> Self {
170        self.use_browser_user_agent = use_browser_user_agent;
171        self
172    }
173
174    /// 下载长度发送间隔
175    pub fn downloaded_len_send_interval(
176        mut self,
177        downloaded_len_send_interval: Option<Duration>,
178    ) -> Self {
179        self.downloaded_len_send_interval = downloaded_len_send_interval;
180        self
181    }
182
183    /// chunks 发送间隔
184    pub fn chunks_send_interval(mut self, chunks_send_interval: Option<Duration>) -> Self {
185        self.chunks_send_interval = chunks_send_interval;
186        self
187    }
188
189    /*
190    pub fn timeout(mut self, timeout: Option<Duration>) -> Self {
191        self.timeout = timeout;
192        self
193    }*/
194
195    /// 文件名称
196    pub fn file_name(mut self, file_name: Option<String>) -> Self {
197        self.file_name = file_name;
198        self
199    }
200
201    /// chunk 大小
202    pub fn chunk_size(mut self, chunk_size: NonZeroUsize) -> Self {
203        self.chunk_size = chunk_size;
204        self
205    }
206
207    /// HTTP Etag 校验
208    pub fn etag(mut self, etag: Option<ETag>) -> Self {
209        self.etag = etag;
210        self
211    }
212
213    /// 是否严格检测 Accept-Ranges 响应头
214    pub fn strict_check_accept_ranges(mut self, strict_check_accept_ranges: bool) -> Self {
215        self.strict_check_accept_ranges = strict_check_accept_ranges;
216        self
217    }
218
219    /// 下载连接数
220    pub fn download_connection_count(mut self, download_connection_count: NonZeroU8) -> Self {
221        self.download_connection_count = download_connection_count;
222        self
223    }
224
225    /// reqwest::Request 配置方法
226    pub fn http_request_configure(mut self, http_request_configure: impl Fn(reqwest::Request) -> reqwest::Request + Send + Sync + 'static) -> Self {
227        self.http_request_configure = Some(Box::new(http_request_configure));
228        self
229    }
230
231    /// 构建 `ExtendedHttpFileDownloader`
232    /// 参数为需要开启的扩展,多个扩展用元组来表示,如果不需要扩展可以传入`()`空元组
233    pub fn build<
234        DEB: DownloadExtensionBuilder,
235    >(
236        self,
237        extension_builder: DEB,
238    ) -> (ExtendedHttpFileDownloader, DEB::ExtensionState) {
239        let mut downloader = HttpFileDownloader::new(
240            self.client.unwrap_or(Default::default()),
241            Arc::new(HttpDownloadConfig {
242                set_len_in_advance: self.set_len_in_advance,
243                download_connection_count: self.download_connection_count,
244                chunk_size: self.chunk_size,
245                file_name: self
246                    .file_name
247                    .unwrap_or_else(|| self.url.file_name().to_string()),
248                open_option: self.open_option,
249                create_dir: self.create_dir,
250                url: Arc::new(self.url),
251                save_dir: self.save_dir,
252                etag: self.etag,
253                request_retry_count: self.request_retry_count,
254                // timeout: self.timeout,
255                header_map: self.header_map,
256                downloaded_len_send_interval: self.downloaded_len_send_interval,
257                chunks_send_interval: self.chunks_send_interval,
258                strict_check_accept_ranges: self.strict_check_accept_ranges,
259                http_request_configure: self.http_request_configure,
260                cancel_token: self.cancel_token,
261                handle_redirection: self.handle_redirection,
262                use_browser_user_agent: true,
263            }),
264        );
265        let (extension, es) = extension_builder.build(&mut downloader);
266        (ExtendedHttpFileDownloader::new(downloader, Box::new(extension)), es)
267    }
268}
269
270pub trait UrlFileName {
271    fn file_name(&self) -> Cow<str>;
272}
273
274impl UrlFileName for Url {
275    fn file_name(&self) -> Cow<str> {
276        let website_default: &'static str = "index.html";
277        self.path_segments()
278            .map(|n| {
279                n.last()
280                    .map(|n| Cow::Borrowed(if n.is_empty() { website_default } else { n }))
281                    .unwrap_or_else(|| {
282                        self.domain()
283                            .map(Cow::Borrowed)
284                            .unwrap_or(Cow::Owned(website_default.to_string()))
285                    })
286            })
287            .unwrap_or_else(|| Cow::Owned(website_default.to_string()))
288    }
289}