1use fs2::FileExt;
2use glob::glob;
3use log::{debug, error, info, warn};
4use rand::RngExt;
5use reqwest::blocking::{Client, ClientBuilder};
6use reqwest::header::ETAG;
7use std::default::Default;
8use std::env;
9use std::fs::{self, OpenOptions};
10use std::path::{Path, PathBuf};
11use std::thread;
12use std::time::{self, Duration};
13use tempfile::NamedTempFile;
14
15use crate::archives::{extract_archive, ArchiveFormat};
16use crate::utils::hash_str;
17#[cfg(feature = "progress-bar")]
18use crate::ProgressBar;
19use crate::{meta::Meta, Error};
20
21#[derive(Debug)]
23pub struct CacheBuilder {
24 config: Config,
25}
26
27#[derive(Debug)]
28struct Config {
29 dir: Option<PathBuf>,
30 client_builder: ClientBuilder,
31 max_retries: u32,
32 max_backoff: u32,
33 freshness_lifetime: Option<u64>,
34 offline: bool,
35 #[cfg(feature = "progress-bar")]
36 progress_bar: Option<ProgressBar>,
37}
38
39impl CacheBuilder {
40 pub fn new() -> CacheBuilder {
42 CacheBuilder {
43 config: Config {
44 dir: None,
45 client_builder: ClientBuilder::new().timeout(None),
46 max_retries: 3,
47 max_backoff: 5000,
48 freshness_lifetime: None,
49 offline: false,
50 #[cfg(feature = "progress-bar")]
51 progress_bar: Some(ProgressBar::default()),
52 },
53 }
54 }
55
56 pub fn with_client_builder(client_builder: ClientBuilder) -> CacheBuilder {
58 CacheBuilder::new().client_builder(client_builder)
59 }
60
61 pub fn dir(mut self, dir: PathBuf) -> CacheBuilder {
65 self.config.dir = Some(dir);
66 self
67 }
68
69 pub fn client_builder(mut self, client_builder: ClientBuilder) -> CacheBuilder {
71 self.config.client_builder = client_builder;
72 self
73 }
74
75 pub fn timeout(mut self, timeout: Duration) -> CacheBuilder {
77 self.config.client_builder = self.config.client_builder.timeout(timeout);
78 self
79 }
80
81 pub fn connect_timeout(mut self, timeout: Duration) -> CacheBuilder {
83 self.config.client_builder = self.config.client_builder.connect_timeout(timeout);
84 self
85 }
86
87 pub fn max_retries(mut self, max_retries: u32) -> CacheBuilder {
89 self.config.max_retries = max_retries;
90 self
91 }
92
93 pub fn max_backoff(mut self, max_backoff: u32) -> CacheBuilder {
95 self.config.max_backoff = max_backoff;
96 self
97 }
98
99 pub fn freshness_lifetime(mut self, freshness_lifetime: u64) -> CacheBuilder {
102 self.config.freshness_lifetime = Some(freshness_lifetime);
103 self
104 }
105
106 pub fn offline(mut self, offline: bool) -> CacheBuilder {
113 self.config.offline = offline;
114 self
115 }
116
117 #[cfg(feature = "progress-bar")]
121 pub fn progress_bar(mut self, progress_bar: Option<ProgressBar>) -> CacheBuilder {
122 self.config.progress_bar = progress_bar;
123 self
124 }
125
126 pub fn build(self) -> Result<Cache, Error> {
128 let dir = self.config.dir.unwrap_or_else(|| {
129 if let Some(dir_str) = env::var_os("RUST_CACHED_PATH_ROOT") {
130 PathBuf::from(dir_str)
131 } else {
132 env::temp_dir().join("cache/")
133 }
134 });
135 let http_client = self.config.client_builder.build()?;
136 fs::create_dir_all(&dir)?;
137 Ok(Cache {
138 dir,
139 http_client,
140 max_retries: self.config.max_retries,
141 max_backoff: self.config.max_backoff,
142 freshness_lifetime: self.config.freshness_lifetime,
143 offline: self.config.offline,
144 #[cfg(feature = "progress-bar")]
145 progress_bar: self.config.progress_bar,
146 })
147 }
148}
149
150impl Default for CacheBuilder {
151 fn default() -> Self {
152 Self::new()
153 }
154}
155
156#[derive(Default)]
158pub struct Options {
159 pub subdir: Option<String>,
161 pub extract: bool,
163 pub force: bool,
165}
166
167impl Options {
168 pub fn new(subdir: Option<&str>, extract: bool, force: bool) -> Self {
169 Self {
170 subdir: subdir.map(String::from),
171 extract,
172 force,
173 }
174 }
175
176 pub fn subdir(mut self, subdir: &str) -> Self {
178 self.subdir = Some(subdir.into());
179 self
180 }
181
182 pub fn extract(mut self) -> Self {
184 self.extract = true;
185 self
186 }
187
188 pub fn force(mut self) -> Self {
190 self.force = true;
191 self
192 }
193}
194
195#[derive(Debug, Clone)]
197pub struct Cache {
198 pub dir: PathBuf,
200 max_retries: u32,
202 max_backoff: u32,
204 freshness_lifetime: Option<u64>,
210 offline: bool,
214 #[cfg(feature = "progress-bar")]
216 progress_bar: Option<ProgressBar>,
217 http_client: Client,
219}
220
221impl Cache {
222 pub fn new() -> Result<Self, Error> {
224 Cache::builder().build()
225 }
226
227 pub fn builder() -> CacheBuilder {
229 CacheBuilder::new()
230 }
231
232 pub fn cached_path(&self, resource: &str) -> Result<PathBuf, Error> {
237 self.cached_path_with_options(resource, &Options::default())
238 }
239
240 pub fn cached_path_with_options(
272 &self,
273 resource: &str,
274 options: &Options,
275 ) -> Result<PathBuf, Error> {
276 let cached_path: PathBuf;
277 let mut extraction_dir: Option<PathBuf> = None;
278
279 if !resource.starts_with("http") {
280 info!("Treating {resource} as local file");
283 cached_path = PathBuf::from(resource);
284
285 if !cached_path.is_file() {
286 return Err(Error::ResourceNotFound(String::from(resource)));
287 }
288
289 if options.extract {
290 let resource_last_modified = fs::metadata(resource)?
295 .modified()
296 .ok()
297 .and_then(|sys_time| sys_time.elapsed().ok())
298 .map(|duration| format!("{}", duration.as_secs()));
299 extraction_dir = Some(self.resource_to_filepath(
300 resource,
301 &resource_last_modified,
302 options.subdir.as_deref(),
303 Some("-extracted"),
304 ));
305 }
306 } else {
307 let meta =
309 self.fetch_remote_resource(resource, options.subdir.as_deref(), options.force)?;
310
311 if options.extract {
313 extraction_dir = Some(meta.get_extraction_path());
314 }
315
316 cached_path = meta.resource_path;
317 }
318
319 if let Some(dirpath) = extraction_dir {
320 debug!("Treating {resource} as archive");
322
323 fs::create_dir_all(dirpath.parent().unwrap())?;
324
325 debug!("Acquiring lock on extraction directory for {resource}");
328 let lock_path = format!("{}.lock", dirpath.to_str().unwrap());
329 let filelock = OpenOptions::new()
330 .read(true)
331 .write(true)
332 .create(true)
333 .truncate(true)
334 .open(lock_path)?;
335 filelock.lock_exclusive()?;
336 debug!("Lock on extraction directory acquired for {resource}");
337
338 if !dirpath.is_dir() {
339 info!("Extracting {resource} to {dirpath:?}");
340 let format = ArchiveFormat::parse_from_extension(&cached_path)?;
341 extract_archive(&cached_path, &dirpath, &format)?;
342 }
343
344 fs2::FileExt::unlock(&filelock)?;
345 debug!("Lock released on extraction directory for {resource}");
347
348 Ok(dirpath)
349 } else {
350 Ok(cached_path)
351 }
352 }
353
354 #[deprecated(
370 since = "0.4.4",
371 note = "Please use Cache::cached_path_with_options() instead"
372 )]
373 pub fn cached_path_in_subdir(
374 &self,
375 resource: &str,
376 subdir: Option<&str>,
377 ) -> Result<PathBuf, Error> {
378 let options = Options::new(subdir, false, false);
379 self.cached_path_with_options(resource, &options)
380 }
381
382 fn fetch_remote_resource(
383 &self,
384 resource: &str,
385 subdir: Option<&str>,
386 force: bool,
387 ) -> Result<Meta, Error> {
388 let url =
390 reqwest::Url::parse(resource).map_err(|_| Error::InvalidUrl(String::from(resource)))?;
391
392 if let Some(subdir_path) = subdir {
394 fs::create_dir_all(self.dir.join(subdir_path))?;
395 } else {
396 fs::create_dir_all(&self.dir)?;
397 };
398
399 if !force {
400 let versions = self.find_existing(resource, subdir); if self.offline {
404 if !versions.is_empty() {
405 info!("Found existing cached version of {resource}");
406 return Ok(versions[0].clone());
407 } else {
408 error!("Offline mode is enabled but no cached versions of resource exist.");
409 return Err(Error::NoCachedVersions(String::from(resource)));
410 }
411 } else if !versions.is_empty() && versions[0].is_fresh(self.freshness_lifetime) {
412 info!("Latest cached version of {resource} is still fresh");
414 return Ok(versions[0].clone());
415 }
416 } else if self.offline {
417 return Err(Error::ConfigurationError(
418 "'force=true' is invalid with offline mode enabled".to_string(),
419 ));
420 }
421
422 let etag = self.try_get_etag(resource, &url)?;
426 let path = self.resource_to_filepath(resource, &etag, subdir, None);
427
428 debug!("Acquiring lock for cache of {resource}");
431 let lock_path = format!("{}.lock", path.to_str().unwrap());
432 let filelock = OpenOptions::new()
433 .read(true)
434 .write(true)
435 .create(true)
436 .truncate(true)
437 .open(lock_path)?;
438 filelock.lock_exclusive()?;
439 debug!("Lock acquired for {resource}");
440
441 if path.exists() {
442 if !force {
443 info!("Cached version of {resource} is up-to-date");
447 fs2::FileExt::unlock(&filelock)?;
449 return Meta::from_cache(&path);
450 } else {
451 warn!("Forcing re-download of {resource} despite cache hit");
452 }
453 }
454
455 let meta = self.try_download_resource(resource, &url, &path, &etag)?;
457
458 info!("New version of {resource} cached");
459
460 fs2::FileExt::unlock(&filelock)?;
462 debug!("Lock released for {resource}");
463
464 Ok(meta)
465 }
466
467 fn find_existing(&self, resource: &str, subdir: Option<&str>) -> Vec<Meta> {
469 let mut existing_meta: Vec<Meta> = vec![];
470 let glob_string = format!(
471 "{}*.meta",
472 self.resource_to_filepath(resource, &None, subdir, None)
473 .to_str()
474 .unwrap(),
475 );
476 for meta_path in glob(&glob_string).unwrap().filter_map(Result::ok) {
477 if let Ok(meta) = Meta::from_path(&meta_path) {
478 existing_meta.push(meta);
479 }
480 }
481 existing_meta
482 .sort_unstable_by(|a, b| b.creation_time.partial_cmp(&a.creation_time).unwrap());
483 existing_meta
484 }
485
486 fn get_retry_delay(&self, retries: u32) -> u32 {
487 let mut rng = rand::rngs::ThreadRng::default();
488 std::cmp::min(
489 2u32.pow(retries - 1) * 1000 + rng.random_range(0..1000),
490 self.max_backoff,
491 )
492 }
493
494 fn try_download_resource(
495 &self,
496 resource: &str,
497 url: &reqwest::Url,
498 path: &Path,
499 etag: &Option<String>,
500 ) -> Result<Meta, Error> {
501 let mut retries: u32 = 0;
502 loop {
503 match self.download_resource(resource, url, path, etag) {
504 Ok(meta) => {
505 return Ok(meta);
506 }
507 Err(err) => {
508 if retries >= self.max_retries {
509 error!("Max retries exceeded for {resource}");
510 return Err(err);
511 }
512 if !err.is_retriable() {
513 error!("Download failed for {resource} with fatal error, {err}");
514 return Err(err);
515 }
516 retries += 1;
517 let retry_delay = self.get_retry_delay(retries);
518 warn!(
519 "Download failed for {resource}: {err}\nRetrying in {retry_delay} milliseconds..."
520 );
521 thread::sleep(time::Duration::from_millis(u64::from(retry_delay)));
522 }
523 }
524 }
525 }
526
527 fn download_resource(
528 &self,
529 resource: &str,
530 url: &reqwest::Url,
531 path: &Path,
532 etag: &Option<String>,
533 ) -> Result<Meta, Error> {
534 debug!("Attempting connection to {url}");
535
536 let mut response = self
537 .http_client
538 .get(url.clone())
539 .send()?
540 .error_for_status()?;
541
542 debug!("Opened connection to {url}");
543
544 let tempfile = NamedTempFile::new_in(path.parent().unwrap())?;
548 let mut tempfile_write_handle = OpenOptions::new().write(true).open(tempfile.path())?;
549
550 info!("Starting download of {url}");
551
552 #[cfg(feature = "progress-bar")]
553 let bytes = if let Some(progress_bar) = &self.progress_bar {
554 let mut download_wrapper = progress_bar.wrap_download(
555 resource,
556 response.content_length(),
557 tempfile_write_handle,
558 );
559 let bytes = response.copy_to(&mut download_wrapper)?;
560 download_wrapper.finish();
561 bytes
562 } else {
563 response.copy_to(&mut tempfile_write_handle)?
564 };
565 #[cfg(not(feature = "progress-bar"))]
566 let bytes = response.copy_to(&mut tempfile_write_handle)?;
567
568 info!("Downloaded {bytes} bytes");
569 debug!("Writing meta file");
570
571 let meta = Meta::new(
572 String::from(resource),
573 path.into(),
574 etag.clone(),
575 self.freshness_lifetime,
576 );
577 meta.to_file()?;
578
579 debug!("Renaming temp file to cache location for {url}");
580
581 fs::rename(tempfile.path(), path)?;
582
583 Ok(meta)
584 }
585
586 fn try_get_etag(&self, resource: &str, url: &reqwest::Url) -> Result<Option<String>, Error> {
587 let mut retries: u32 = 0;
588 loop {
589 match self.get_etag(url) {
590 Ok(etag) => return Ok(etag),
591 Err(err) => {
592 if retries >= self.max_retries {
593 error!("Max retries exceeded for {resource}");
594 return Err(err);
595 }
596 if !err.is_retriable() {
597 error!("ETAG fetch for {resource} failed with fatal error");
598 return Err(err);
599 }
600 retries += 1;
601 let retry_delay = self.get_retry_delay(retries);
602 warn!(
603 "ETAG fetch failed for {resource}, retrying in {retry_delay} milliseconds..."
604 );
605 thread::sleep(time::Duration::from_millis(u64::from(retry_delay)));
606 }
607 }
608 }
609 }
610
611 fn get_etag(&self, url: &reqwest::Url) -> Result<Option<String>, Error> {
612 debug!("Fetching ETAG for {url}");
613 let response = self
614 .http_client
615 .head(url.clone())
616 .send()?
617 .error_for_status()?;
618 if let Some(etag) = response.headers().get(ETAG) {
619 if let Ok(s) = etag.to_str() {
620 Ok(Some(s.into()))
621 } else {
622 debug!("No ETAG for {url}");
623 Ok(None)
624 }
625 } else {
626 Ok(None)
627 }
628 }
629
630 fn resource_to_filepath(
631 &self,
632 resource: &str,
633 etag: &Option<String>,
634 subdir: Option<&str>,
635 suffix: Option<&str>,
636 ) -> PathBuf {
637 let resource_hash = hash_str(resource);
638 let mut filename = if let Some(tag) = etag {
639 let etag_hash = hash_str(&tag[..]);
640 format!("{resource_hash}.{etag_hash}")
641 } else {
642 resource_hash
643 };
644
645 if let Some(suf) = suffix {
646 filename.push_str(suf);
647 }
648
649 let filepath = PathBuf::from(filename);
650
651 if let Some(subdir_path) = subdir {
652 self.dir.join(subdir_path).join(filepath)
653 } else {
654 self.dir.join(filepath)
655 }
656 }
657}
658
659#[cfg(test)]
660mod tests {
661 use super::*;
662 use tempfile::tempdir;
663
664 #[test]
665 fn test_url_to_filename_with_etag() {
666 let cache_dir = tempdir().unwrap();
667 let cache = Cache::builder()
668 .dir(cache_dir.path().to_owned())
669 .build()
670 .unwrap();
671
672 let resource = "http://localhost:5000/foo.txt";
673 let etag = String::from("abcd");
674
675 assert_eq!(
676 cache
677 .resource_to_filepath(resource, &Some(etag), None, None)
678 .to_str()
679 .unwrap(),
680 format!(
681 "{}{}{}.{}",
682 cache_dir.path().to_str().unwrap(),
683 std::path::MAIN_SEPARATOR,
684 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
685 "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589"
686 )
687 );
688 }
689
690 #[test]
691 fn test_url_to_filename_no_etag() {
692 let cache_dir = tempdir().unwrap();
693 let cache = Cache::builder()
694 .dir(cache_dir.path().to_owned())
695 .build()
696 .unwrap();
697
698 let resource = "http://localhost:5000/foo.txt";
699 assert_eq!(
700 cache
701 .resource_to_filepath(resource, &None, None, None)
702 .to_str()
703 .unwrap(),
704 format!(
705 "{}{}{}",
706 cache_dir.path().to_str().unwrap(),
707 std::path::MAIN_SEPARATOR,
708 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
709 )
710 );
711 }
712
713 #[test]
714 fn test_url_to_filename_in_subdir() {
715 let cache_dir = tempdir().unwrap();
716 let cache = Cache::builder()
717 .dir(cache_dir.path().to_owned())
718 .build()
719 .unwrap();
720
721 let resource = "http://localhost:5000/foo.txt";
722 assert_eq!(
723 cache
724 .resource_to_filepath(resource, &None, Some("target"), None)
725 .to_str()
726 .unwrap(),
727 format!(
728 "{}{}{}{}{}",
729 cache_dir.path().to_str().unwrap(),
730 std::path::MAIN_SEPARATOR,
731 "target",
732 std::path::MAIN_SEPARATOR,
733 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
734 )
735 );
736 }
737
738 #[test]
739 fn test_url_to_filename_with_suffix() {
740 let cache_dir = tempdir().unwrap();
741 let cache = Cache::builder()
742 .dir(cache_dir.path().to_owned())
743 .build()
744 .unwrap();
745
746 let resource = "http://localhost:5000/foo.txt";
747 assert_eq!(
748 cache
749 .resource_to_filepath(resource, &None, Some("target"), Some("-extracted"))
750 .to_str()
751 .unwrap(),
752 format!(
753 "{}{}{}{}{}-extracted",
754 cache_dir.path().to_str().unwrap(),
755 std::path::MAIN_SEPARATOR,
756 "target",
757 std::path::MAIN_SEPARATOR,
758 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
759 )
760 );
761 }
762}