1use anyhow::Result;
2use chrono::{DateTime, Utc};
3use log::info;
4use reqwest::blocking::{Client, Response};
5use reqwest::header::{IF_MODIFIED_SINCE, LAST_MODIFIED};
6use rustls::ClientConfig;
7use rustls_platform_verifier::BuilderVerifierExt;
8use serde::{Deserialize, Serialize};
9use sha2::{Digest, Sha256};
10use std::collections::HashMap;
11use std::fs::{self, File};
12use std::io::{BufReader, Read};
13use std::path::{Path, PathBuf};
14use std::sync::{OnceLock, RwLock};
15use tempfile::NamedTempFile;
16
17const METADATA_FILE: &str = "_metadata.json";
18const TWO_WEEKS: u64 = 14 * 24 * 3600;
19
20#[derive(Debug, Serialize, Deserialize, Default)]
21struct Metadata {
22 files: HashMap<String, FileEntry>,
23}
24
25#[derive(Debug, Serialize, Deserialize, Clone)]
26struct FileEntry {
27 path: PathBuf,
28 last_checked: DateTime<Utc>,
29 last_modified: Option<DateTime<Utc>>,
30 content_hash: String,
31}
32
33pub struct Downloader {
34 cache_dir: PathBuf,
35 metadata_path: PathBuf,
36 metadata: OnceLock<RwLock<Metadata>>,
37 _client: OnceLock<Client>,
38}
39
40impl Downloader {
41 pub fn new(cache_dir: impl AsRef<Path>) -> Self {
42 let cache_dir = cache_dir.as_ref().to_path_buf();
43 info!("Cache folder at: {cache_dir:?}");
44
45 let metadata_path = cache_dir.join(METADATA_FILE);
46
47 Self {
48 cache_dir,
49 metadata_path,
50 metadata: OnceLock::new(),
51 _client: OnceLock::new(),
52 }
53 }
54
55 fn client(&self) -> &Client {
56 self._client.get_or_init(|| {
57 let arc_crypto_provider =
59 std::sync::Arc::new(rustls::crypto::aws_lc_rs::default_provider());
60 let config = ClientConfig::builder_with_provider(arc_crypto_provider)
61 .with_safe_default_protocol_versions()
62 .unwrap()
63 .with_platform_verifier()
64 .unwrap()
65 .with_no_client_auth();
66 reqwest::blocking::Client::builder()
67 .use_preconfigured_tls(config)
68 .build()
69 .expect("Failed to build http client")
70 })
71 }
72
73 fn metadata(&self) -> &RwLock<Metadata> {
74 let metadata_path = self.metadata_path.clone();
75 let cache_dir = self.cache_dir.clone();
76 self.metadata.get_or_init(move || {
77 fs::create_dir_all(&cache_dir)
78 .expect("Failed to create cache directory: {cache_dir:?}");
79 let metadata = Self::load_metadata(&metadata_path);
80 RwLock::new(metadata)
81 })
82 }
83
84 fn load_metadata(metadata_path: &Path) -> Metadata {
85 match File::open(metadata_path) {
86 Ok(file) => match serde_json::from_reader(file) {
87 Ok(metadata) => metadata,
88 Err(err) => {
89 log::warn!("Failed to parse metadata file {metadata_path:?}: {err}");
90 Metadata::default()
91 }
92 },
93 Err(err) => {
94 if err.kind() != std::io::ErrorKind::NotFound {
95 log::warn!("Failed to open metadata file {metadata_path:?}: {err}");
96 }
97 Metadata::default()
98 }
99 }
100 }
101
102 fn persist_metadata(&self, metadata: &Metadata) -> Result<()> {
103 let file = File::create(&self.metadata_path)?;
104 serde_json::to_writer_pretty(file, metadata)?;
105 Ok(())
106 }
107
108 fn purge_stale_entry(&self, url: &str, stale_path: &Path) {
109 let mut metadata = self.metadata().write().unwrap();
110 if metadata
111 .files
112 .get(url)
113 .map(|entry| entry.path == stale_path)
114 .unwrap_or(false)
115 {
116 metadata.files.remove(url);
117 if let Err(err) = self.persist_metadata(&metadata) {
118 log::error!(
119 "Failed to persist metadata after removing stale entry for {url}: {err}"
120 );
121 }
122 }
123 }
124
125 pub fn get(&self, url: &str) -> Result<PathBuf> {
126 let entry = {
127 let metadata = self.metadata().read().unwrap();
128 metadata.files.get(url).cloned()
129 };
130
131 let result = match entry {
132 Some(entry) => {
133 if !entry.path.exists() {
134 self.purge_stale_entry(url, &entry.path);
135 self.download_new(url)
136 } else {
137 let needs_update =
138 entry.last_checked.timestamp() + TWO_WEEKS as i64 <= Utc::now().timestamp();
139 if needs_update {
140 self.try_update(url)
141 } else {
142 Ok(entry.path)
143 }
144 }
145 }
146 None => self.download_new(url),
147 };
148
149 result.or_else(|e| {
150 log::error!("Failed to update, using cached version: {e}");
151 let entry = {
152 let metadata = self.metadata().read().unwrap();
153 metadata
154 .files
155 .get(url)
156 .map(|file_info| file_info.path.clone())
157 };
158 match entry {
159 Some(path) => {
160 if path.exists() {
161 Ok(path)
162 } else {
163 self.purge_stale_entry(url, &path);
164 self.download_new(url)
166 }
167 }
168 None => {
169 log::warn!("URL not found in cache, attempting fresh download: {url}");
171 self.download_new(url)
172 }
173 }
174 })
175 }
176
177 fn try_update(&self, url: &str) -> Result<PathBuf> {
178 let last_modified = {
180 self.metadata()
181 .read()
182 .unwrap()
183 .files
184 .get(url)
185 .and_then(|e| e.last_modified)
186 };
187 let mut request = self.client().get(url);
191 if let Some(lm) = last_modified {
192 request = request.header(IF_MODIFIED_SINCE, lm.with_timezone(&Utc).to_rfc2822());
193 }
194 let response = request.send()?;
197 match response.status().as_u16() {
200 304 => self.update_check_time(url),
201 200 => self.handle_updated_response(url, response),
202 status => {
203 let _ = self.update_check_time(url);
204 Err(anyhow::anyhow!("Unexpected status code: {}", status))
205 }
206 }
207 }
208
209 fn handle_updated_response(&self, url: &str, response: Response) -> Result<PathBuf> {
210 let last_modified = parse_last_modified(&response);
211 let temp_file = self.download_to_temp(response)?;
212 let new_hash = compute_file_hash(temp_file.path())?;
213 let old_hash = {
214 let metadata = self
215 .metadata()
216 .read()
217 .map_err(|e| anyhow::anyhow!("Lock error: {}", e))?;
218 metadata.files.get(url).unwrap().content_hash.clone()
219 };
220 if new_hash == old_hash {
221 self.update_check_time(url)
222 } else {
223 self.replace_file(url, temp_file, last_modified, new_hash)
224 }
225 }
226
227 fn download_new(&self, url: &str) -> Result<PathBuf> {
228 let response = self.client().get(url).send()?;
229 let last_modified = parse_last_modified(&response);
230 let temp_file = self.download_to_temp(response)?;
231 let new_hash = compute_file_hash(temp_file.path())?;
232 self.store_new_file(url, temp_file, last_modified, new_hash)
233 }
234
235 fn download_to_temp(&self, mut response: Response) -> Result<NamedTempFile> {
236 let mut temp_file = NamedTempFile::new_in(&self.cache_dir)?;
237 std::io::copy(&mut response, &mut temp_file)?;
238 Ok(temp_file)
239 }
240
241 fn store_new_file(
242 &self,
243 url: &str,
244 temp_file: NamedTempFile,
245 last_modified: Option<DateTime<Utc>>,
246 content_hash: String,
247 ) -> Result<PathBuf> {
248 let filename = hash_url(url);
249 let path = self.cache_dir.join(filename);
250 temp_file.persist(&path)?;
251
252 let entry = FileEntry {
253 path: path.clone(),
254 last_checked: Utc::now(),
255 last_modified,
256 content_hash,
257 };
258 {
259 let mut metadata = self.metadata().write().unwrap();
260 metadata.files.insert(url.to_string(), entry);
261 self.persist_metadata(&metadata)?;
262 }
263 Ok(path)
264 }
265
266 fn replace_file(
267 &self,
268 url: &str,
269 temp_file: NamedTempFile,
270 last_modified: Option<DateTime<Utc>>,
271 content_hash: String,
272 ) -> Result<PathBuf> {
273 let new_path: PathBuf;
274 {
275 let mut metadata = self.metadata().write().unwrap();
276 let entry = metadata.files.get_mut(url).unwrap();
277 let old_path = entry.path.clone();
278
279 new_path = self.cache_dir.join(hash_url(url));
280 temp_file.persist(&new_path)?;
281
282 if old_path != new_path && old_path.exists() {
284 fs::remove_file(old_path)?;
285 }
286
287 entry.path = new_path.clone();
288 entry.last_checked = Utc::now();
289 entry.last_modified = last_modified;
290 entry.content_hash = content_hash;
291 self.persist_metadata(&metadata)?;
292 }
293
294 Ok(new_path)
295 }
296
297 fn update_check_time(&self, url: &str) -> Result<PathBuf> {
298 let path: PathBuf;
299 {
300 let mut metadata = self.metadata().write().unwrap();
301 let entry = metadata.files.get_mut(url).unwrap();
302 entry.last_checked = Utc::now();
303 path = entry.path.clone();
304 self.persist_metadata(&metadata)?;
305 }
306 Ok(path)
307 }
308}
309
310fn hash_url(url: &str) -> String {
311 let mut hasher = Sha256::new();
312 hasher.update(url.as_bytes());
313 format!("{:x}", hasher.finalize())
314}
315
316fn compute_file_hash(path: &Path) -> Result<String> {
317 let file = File::open(path)?;
318 let mut reader = BufReader::new(file);
319 let mut hasher = Sha256::new();
320 let mut buffer = [0; 1024];
321
322 loop {
323 let count = reader.read(&mut buffer)?;
324 if count == 0 {
325 break;
326 }
327 hasher.update(&buffer[..count]);
328 }
329
330 Ok(format!("{:x}", hasher.finalize()))
331}
332
333fn parse_last_modified(response: &Response) -> Option<DateTime<Utc>> {
334 response
335 .headers()
336 .get(LAST_MODIFIED)
337 .and_then(|hv| hv.to_str().ok())
338 .and_then(|s| DateTime::parse_from_rfc2822(s).ok())
339 .map(|dt| dt.with_timezone(&Utc))
340}
341
342#[cfg(test)]
343mod tests {
344 use super::*;
345 use chrono::Duration;
346 use httpmock::MockServer;
347 use tempfile::tempdir;
348
349 #[test]
350 fn test_download_new_file() {
351 let server = MockServer::start();
352 let mock = server.mock(|when, then| {
353 when.method("GET").path("/test.txt");
354 then.status(200)
355 .body("test content")
356 .header("Last-Modified", "Wed, 21 Oct 2023 07:28:00 GMT");
357 });
358
359 let temp_dir = tempdir().unwrap();
360 let downloader = Downloader::new(temp_dir.path());
361 let path = downloader.get(&server.url("/test.txt")).unwrap();
362
363 mock.assert();
364 assert!(path.exists());
365 assert_eq!(std::fs::read_to_string(path).unwrap(), "test content");
366 let metadata = downloader.metadata().read().unwrap();
367 let entry = metadata.files.get(&server.url("/test.txt")).unwrap();
368 assert_eq!(entry.content_hash, compute_file_hash(&entry.path).unwrap());
369 }
370
371 #[test]
372 fn test_returns_cached_file_when_offline() {
373 let server = MockServer::start();
374 let temp_dir = tempdir().unwrap();
375
376 let mock = server.mock(|when, then| {
378 when.method("GET").path("/test.txt");
379 then.status(200).body("cached content");
380 });
381
382 let downloader = Downloader::new(temp_dir.path());
383 let path = downloader.get(&server.url("/test.txt")).unwrap();
384 mock.assert();
385
386 let downloader = Downloader::new(temp_dir.path());
388 let cached_path = downloader.get(&server.url("/test.txt")).unwrap();
390
391 assert_eq!(path, cached_path);
392 assert_eq!(
393 std::fs::read_to_string(cached_path).unwrap(),
394 "cached content"
395 );
396 }
397
398 #[test]
399 fn test_updates_file_when_modified() {
400 let server = MockServer::start();
401 let temp_dir = tempdir().unwrap();
402 let test_path = server.url("/test.txt");
403
404 let initial_last_modified = "Wed, 21 Oct 2020 07:28:00 GMT";
406 let mut mock1 = server.mock(|when, then| {
407 when.method("GET").path("/test.txt");
408 then.status(200)
409 .body("v1")
410 .header("Last-Modified", initial_last_modified);
411 });
412
413 let downloader = Downloader::new(temp_dir.path());
414 let path_v1 = downloader.get(&test_path).unwrap();
415 mock1.assert();
416 mock1.delete();
417
418 let stored_last_modified = {
420 let metadata = downloader.metadata().read().unwrap();
421 metadata.files[&test_path].last_modified
422 };
423
424 {
426 let mut metadata = downloader.metadata().write().unwrap();
427 let entry = metadata.files.get_mut(&test_path).unwrap();
428 entry.last_checked = stored_last_modified.unwrap() - Duration::weeks(3);
429 }
430
431 let mock2 = server.mock(|when, then| {
433 when.method("GET").path("/test.txt").header(
434 IF_MODIFIED_SINCE.as_str(),
435 stored_last_modified.unwrap().to_rfc2822(),
436 );
437 then.status(200)
438 .body("v2")
439 .header("Last-Modified", "Fri, 23 Oct 2020 07:28:00 GMT");
440 });
441
442 let path_v2 = downloader.get(&test_path).unwrap();
443 println!("path: {path_v2:?}");
444 mock2.assert();
445
446 assert_eq!(path_v1, path_v2);
447 assert_eq!(std::fs::read_to_string(path_v2).unwrap(), "v2");
448 }
449 #[test]
450 fn test_uses_stale_file_when_update_fails() {
451 let server = MockServer::start();
452 let temp_dir = tempdir().unwrap();
453
454 let mock1 = server.mock(|when, then| {
456 when.method("GET").path("/test.txt");
457 then.status(200).body("original");
458 });
459
460 let downloader = Downloader::new(temp_dir.path());
461 let original_path = downloader.get(&server.url("/test.txt")).unwrap();
462 mock1.assert();
463
464 {
466 let mut metadata = downloader.metadata().try_write().unwrap();
467 if let Some(entry) = metadata.files.get_mut(&server.url("/test.txt")) {
468 entry.last_checked = Utc::now() - Duration::seconds(TWO_WEEKS as i64 * 2);
469 }
470 }
471 let cached_path = downloader.get(&server.url("/test.txt")).unwrap();
474 assert_eq!(original_path, cached_path);
475 }
476
477 #[test]
478 fn test_doesnt_check_within_two_weeks() {
479 let server = MockServer::start();
480 let temp_dir = tempdir().unwrap();
481
482 let mock = server.mock(|when, then| {
484 when.method("GET").path("/test.txt");
485 then.status(200).body("content");
486 });
487
488 let downloader = Downloader::new(temp_dir.path());
489 downloader.get(&server.url("/test.txt")).unwrap();
490 mock.assert_calls(1);
491
492 let mock2 = server.mock(|when, then| {
494 when.method("GET").path("/test.txt");
495 then.status(200);
496 });
497 downloader.get(&server.url("/test.txt")).unwrap();
498 mock2.assert_calls(0); }
500
501 #[test]
502 fn test_handles_304_not_modified() {
503 let server = MockServer::start();
504 let temp_dir = tempdir().unwrap();
505
506 let mut mock1 = server.mock(|when, then| {
508 when.method("GET").path("/test.txt");
509 then.status(200)
510 .body("content")
511 .header("Last-Modified", "Wed, 21 Oct 2020 07:28:00 GMT");
512 });
513
514 let downloader = Downloader::new(temp_dir.path());
515 let original_path = downloader.get(&server.url("/test.txt")).unwrap();
516 mock1.assert();
517 mock1.delete();
518
519 {
521 let mut metadata = downloader.metadata().write().unwrap();
522 if let Some(entry) = metadata.files.get_mut(&server.url("/test.txt")) {
523 entry.last_checked = DateTime::parse_from_rfc2822("Wed, 21 Oct 2020 07:28:00 GMT")
524 .unwrap()
525 .with_timezone(&Utc);
526 }
527 }
528
529 let mock2 = server.mock(|when, then| {
531 when.method("GET")
532 .path("/test.txt")
533 .header("If-Modified-Since", "Wed, 21 Oct 2020 07:28:00 +0000");
534 then.status(304);
535 });
536
537 let cached_path = downloader.get(&server.url("/test.txt")).unwrap();
538 mock2.assert();
539 assert_eq!(original_path, cached_path);
540 let metadata = downloader.metadata().read().unwrap();
541 let entry = metadata.files.get(&server.url("/test.txt")).unwrap();
542 assert!(entry.last_checked > Utc::now() - Duration::seconds(1));
543 }
544
545 #[test]
546 fn test_file_hashing() {
547 let url1 = "https://example.com/file1";
548 let url2 = "https://example.com/file2";
549
550 assert_ne!(hash_url(url1), hash_url(url2));
551
552 let same_url = "https://example.com/same";
553 assert_eq!(hash_url(same_url), hash_url(same_url));
554 }
555
556 #[test]
557 fn test_redownloads_when_file_missing() {
558 let server = MockServer::start();
559 let temp_dir = tempdir().unwrap();
560
561 let mut mock1 = server.mock(|when, then| {
563 when.method("GET").path("/test.txt");
564 then.status(200).body("content");
565 });
566
567 let downloader = Downloader::new(temp_dir.path());
568 let path = downloader.get(&server.url("/test.txt")).unwrap();
569 mock1.assert();
570 mock1.delete();
571
572 std::fs::remove_file(&path).unwrap();
574 assert!(!path.exists());
575
576 let mock2 = server.mock(|when, then| {
578 when.method("GET").path("/test.txt");
579 then.status(200).body("redownloaded content");
580 });
581
582 let new_path = downloader.get(&server.url("/test.txt")).unwrap();
583 mock2.assert();
584
585 assert!(new_path.exists());
586 assert_eq!(
587 std::fs::read_to_string(&new_path).unwrap(),
588 "redownloaded content"
589 );
590 }
591}