1use fs2::FileExt;
2use glob::glob;
3use log::{debug, error, info, warn};
4use rand::distributions::{Distribution, Uniform};
5use reqwest::blocking::{Client, ClientBuilder};
6use reqwest::header::ETAG;
7use std::default::Default;
8use std::env;
9use std::fs::{self, OpenOptions};
10use std::path::{Path, PathBuf};
11use std::thread;
12use std::time::{self, Duration};
13use tempfile::NamedTempFile;
14
15use crate::archives::{extract_archive, ArchiveFormat};
16use crate::utils::hash_str;
17#[cfg(feature = "progress-bar")]
18use crate::ProgressBar;
19use crate::{meta::Meta, Error};
20
21#[derive(Debug)]
23pub struct CacheBuilder {
24 config: Config,
25}
26
27#[derive(Debug)]
28struct Config {
29 dir: Option<PathBuf>,
30 client_builder: ClientBuilder,
31 max_retries: u32,
32 max_backoff: u32,
33 freshness_lifetime: Option<u64>,
34 offline: bool,
35 #[cfg(feature = "progress-bar")]
36 progress_bar: Option<ProgressBar>,
37}
38
39impl CacheBuilder {
40 pub fn new() -> CacheBuilder {
42 CacheBuilder {
43 config: Config {
44 dir: None,
45 client_builder: ClientBuilder::new().timeout(None),
46 max_retries: 3,
47 max_backoff: 5000,
48 freshness_lifetime: None,
49 offline: false,
50 #[cfg(feature = "progress-bar")]
51 progress_bar: Some(ProgressBar::default()),
52 },
53 }
54 }
55
56 pub fn with_client_builder(client_builder: ClientBuilder) -> CacheBuilder {
58 CacheBuilder::new().client_builder(client_builder)
59 }
60
61 pub fn dir(mut self, dir: PathBuf) -> CacheBuilder {
65 self.config.dir = Some(dir);
66 self
67 }
68
69 pub fn client_builder(mut self, client_builder: ClientBuilder) -> CacheBuilder {
71 self.config.client_builder = client_builder;
72 self
73 }
74
75 pub fn timeout(mut self, timeout: Duration) -> CacheBuilder {
77 self.config.client_builder = self.config.client_builder.timeout(timeout);
78 self
79 }
80
81 pub fn connect_timeout(mut self, timeout: Duration) -> CacheBuilder {
83 self.config.client_builder = self.config.client_builder.connect_timeout(timeout);
84 self
85 }
86
87 pub fn max_retries(mut self, max_retries: u32) -> CacheBuilder {
89 self.config.max_retries = max_retries;
90 self
91 }
92
93 pub fn max_backoff(mut self, max_backoff: u32) -> CacheBuilder {
95 self.config.max_backoff = max_backoff;
96 self
97 }
98
99 pub fn freshness_lifetime(mut self, freshness_lifetime: u64) -> CacheBuilder {
102 self.config.freshness_lifetime = Some(freshness_lifetime);
103 self
104 }
105
106 pub fn offline(mut self, offline: bool) -> CacheBuilder {
113 self.config.offline = offline;
114 self
115 }
116
117 #[cfg(feature = "progress-bar")]
121 pub fn progress_bar(mut self, progress_bar: Option<ProgressBar>) -> CacheBuilder {
122 self.config.progress_bar = progress_bar;
123 self
124 }
125
126 pub fn build(self) -> Result<Cache, Error> {
128 let dir = self.config.dir.unwrap_or_else(|| {
129 if let Some(dir_str) = env::var_os("RUST_CACHED_PATH_ROOT") {
130 PathBuf::from(dir_str)
131 } else {
132 env::temp_dir().join("cache/")
133 }
134 });
135 let http_client = self.config.client_builder.build()?;
136 fs::create_dir_all(&dir)?;
137 Ok(Cache {
138 dir,
139 http_client,
140 max_retries: self.config.max_retries,
141 max_backoff: self.config.max_backoff,
142 freshness_lifetime: self.config.freshness_lifetime,
143 offline: self.config.offline,
144 #[cfg(feature = "progress-bar")]
145 progress_bar: self.config.progress_bar,
146 })
147 }
148}
149
150impl Default for CacheBuilder {
151 fn default() -> Self {
152 Self::new()
153 }
154}
155
156#[derive(Default)]
158pub struct Options {
159 pub subdir: Option<String>,
161 pub extract: bool,
163 pub force: bool,
165}
166
167impl Options {
168 pub fn new(subdir: Option<&str>, extract: bool, force: bool) -> Self {
169 Self {
170 subdir: subdir.map(String::from),
171 extract,
172 force,
173 }
174 }
175
176 pub fn subdir(mut self, subdir: &str) -> Self {
178 self.subdir = Some(subdir.into());
179 self
180 }
181
182 pub fn extract(mut self) -> Self {
184 self.extract = true;
185 self
186 }
187
188 pub fn force(mut self) -> Self {
190 self.force = true;
191 self
192 }
193}
194
195#[derive(Debug, Clone)]
197pub struct Cache {
198 pub dir: PathBuf,
200 max_retries: u32,
202 max_backoff: u32,
204 freshness_lifetime: Option<u64>,
210 offline: bool,
214 #[cfg(feature = "progress-bar")]
216 progress_bar: Option<ProgressBar>,
217 http_client: Client,
219}
220
221impl Cache {
222 pub fn new() -> Result<Self, Error> {
224 Cache::builder().build()
225 }
226
227 pub fn builder() -> CacheBuilder {
229 CacheBuilder::new()
230 }
231
232 pub fn cached_path(&self, resource: &str) -> Result<PathBuf, Error> {
237 self.cached_path_with_options(resource, &Options::default())
238 }
239
240 pub fn cached_path_with_options(
272 &self,
273 resource: &str,
274 options: &Options,
275 ) -> Result<PathBuf, Error> {
276 let cached_path: PathBuf;
277 let mut extraction_dir: Option<PathBuf> = None;
278
279 if !resource.starts_with("http") {
280 info!("Treating {resource} as local file");
283 cached_path = PathBuf::from(resource);
284
285 if !cached_path.is_file() {
286 return Err(Error::ResourceNotFound(String::from(resource)));
287 }
288
289 if options.extract {
290 let resource_last_modified = fs::metadata(resource)?
295 .modified()
296 .ok()
297 .and_then(|sys_time| sys_time.elapsed().ok())
298 .map(|duration| format!("{}", duration.as_secs()));
299 extraction_dir = Some(self.resource_to_filepath(
300 resource,
301 &resource_last_modified,
302 options.subdir.as_deref(),
303 Some("-extracted"),
304 ));
305 }
306 } else {
307 let meta =
309 self.fetch_remote_resource(resource, options.subdir.as_deref(), options.force)?;
310
311 if options.extract {
313 extraction_dir = Some(meta.get_extraction_path());
314 }
315
316 cached_path = meta.resource_path;
317 }
318
319 if let Some(dirpath) = extraction_dir {
320 debug!("Treating {resource} as archive");
322
323 fs::create_dir_all(dirpath.parent().unwrap())?;
324
325 debug!("Acquiring lock on extraction directory for {resource}");
328 let lock_path = format!("{}.lock", dirpath.to_str().unwrap());
329 let filelock = OpenOptions::new()
330 .read(true)
331 .write(true)
332 .create(true)
333 .truncate(true)
334 .open(lock_path)?;
335 filelock.lock_exclusive()?;
336 debug!("Lock on extraction directory acquired for {resource}");
337
338 if !dirpath.is_dir() {
339 info!("Extracting {resource} to {dirpath:?}");
340 let format = ArchiveFormat::parse_from_extension(&cached_path)?;
341 extract_archive(&cached_path, &dirpath, &format)?;
342 }
343
344 fs2::FileExt::unlock(&filelock)?;
345 debug!("Lock released on extraction directory for {resource}");
347
348 Ok(dirpath)
349 } else {
350 Ok(cached_path)
351 }
352 }
353
354 #[deprecated(
370 since = "0.4.4",
371 note = "Please use Cache::cached_path_with_options() instead"
372 )]
373 pub fn cached_path_in_subdir(
374 &self,
375 resource: &str,
376 subdir: Option<&str>,
377 ) -> Result<PathBuf, Error> {
378 let options = Options::new(subdir, false, false);
379 self.cached_path_with_options(resource, &options)
380 }
381
382 fn fetch_remote_resource(
383 &self,
384 resource: &str,
385 subdir: Option<&str>,
386 force: bool,
387 ) -> Result<Meta, Error> {
388 let url =
390 reqwest::Url::parse(resource).map_err(|_| Error::InvalidUrl(String::from(resource)))?;
391
392 if let Some(subdir_path) = subdir {
394 fs::create_dir_all(self.dir.join(subdir_path))?;
395 } else {
396 fs::create_dir_all(&self.dir)?;
397 };
398
399 if !force {
400 let versions = self.find_existing(resource, subdir); if self.offline {
404 if !versions.is_empty() {
405 info!("Found existing cached version of {resource}");
406 return Ok(versions[0].clone());
407 } else {
408 error!("Offline mode is enabled but no cached versions of resource exist.");
409 return Err(Error::NoCachedVersions(String::from(resource)));
410 }
411 } else if !versions.is_empty() && versions[0].is_fresh(self.freshness_lifetime) {
412 info!("Latest cached version of {resource} is still fresh");
414 return Ok(versions[0].clone());
415 }
416 } else if self.offline {
417 return Err(Error::ConfigurationError(
418 "'force=true' is invalid with offline mode enabled".to_string(),
419 ));
420 }
421
422 let etag = self.try_get_etag(resource, &url)?;
426 let path = self.resource_to_filepath(resource, &etag, subdir, None);
427
428 debug!("Acquiring lock for cache of {resource}");
431 let lock_path = format!("{}.lock", path.to_str().unwrap());
432 let filelock = OpenOptions::new()
433 .read(true)
434 .write(true)
435 .create(true)
436 .truncate(true)
437 .open(lock_path)?;
438 filelock.lock_exclusive()?;
439 debug!("Lock acquired for {resource}");
440
441 if path.exists() {
442 if !force {
443 info!("Cached version of {resource} is up-to-date");
447 fs2::FileExt::unlock(&filelock)?;
449 return Meta::from_cache(&path);
450 } else {
451 warn!("Forcing re-download of {resource} despite cache hit");
452 }
453 }
454
455 let meta = self.try_download_resource(resource, &url, &path, &etag)?;
457
458 info!("New version of {resource} cached");
459
460 fs2::FileExt::unlock(&filelock)?;
462 debug!("Lock released for {resource}");
463
464 Ok(meta)
465 }
466
467 fn find_existing(&self, resource: &str, subdir: Option<&str>) -> Vec<Meta> {
469 let mut existing_meta: Vec<Meta> = vec![];
470 let glob_string = format!(
471 "{}*.meta",
472 self.resource_to_filepath(resource, &None, subdir, None)
473 .to_str()
474 .unwrap(),
475 );
476 for meta_path in glob(&glob_string).unwrap().filter_map(Result::ok) {
477 if let Ok(meta) = Meta::from_path(&meta_path) {
478 existing_meta.push(meta);
479 }
480 }
481 existing_meta
482 .sort_unstable_by(|a, b| b.creation_time.partial_cmp(&a.creation_time).unwrap());
483 existing_meta
484 }
485
486 fn get_retry_delay(&self, retries: u32) -> u32 {
487 let between = Uniform::from(0..1000);
488 let mut rng = rand::thread_rng();
489 std::cmp::min(
490 2u32.pow(retries - 1) * 1000 + between.sample(&mut rng),
491 self.max_backoff,
492 )
493 }
494
495 fn try_download_resource(
496 &self,
497 resource: &str,
498 url: &reqwest::Url,
499 path: &Path,
500 etag: &Option<String>,
501 ) -> Result<Meta, Error> {
502 let mut retries: u32 = 0;
503 loop {
504 match self.download_resource(resource, url, path, etag) {
505 Ok(meta) => {
506 return Ok(meta);
507 }
508 Err(err) => {
509 if retries >= self.max_retries {
510 error!("Max retries exceeded for {resource}");
511 return Err(err);
512 }
513 if !err.is_retriable() {
514 error!("Download failed for {resource} with fatal error, {err}");
515 return Err(err);
516 }
517 retries += 1;
518 let retry_delay = self.get_retry_delay(retries);
519 warn!(
520 "Download failed for {resource}: {err}\nRetrying in {retry_delay} milliseconds..."
521 );
522 thread::sleep(time::Duration::from_millis(u64::from(retry_delay)));
523 }
524 }
525 }
526 }
527
528 fn download_resource(
529 &self,
530 resource: &str,
531 url: &reqwest::Url,
532 path: &Path,
533 etag: &Option<String>,
534 ) -> Result<Meta, Error> {
535 debug!("Attempting connection to {url}");
536
537 let mut response = self
538 .http_client
539 .get(url.clone())
540 .send()?
541 .error_for_status()?;
542
543 debug!("Opened connection to {url}");
544
545 let tempfile = NamedTempFile::new_in(path.parent().unwrap())?;
549 let mut tempfile_write_handle = OpenOptions::new().write(true).open(tempfile.path())?;
550
551 info!("Starting download of {url}");
552
553 #[cfg(feature = "progress-bar")]
554 let bytes = if let Some(progress_bar) = &self.progress_bar {
555 let mut download_wrapper = progress_bar.wrap_download(
556 resource,
557 response.content_length(),
558 tempfile_write_handle,
559 );
560 let bytes = response.copy_to(&mut download_wrapper)?;
561 download_wrapper.finish();
562 bytes
563 } else {
564 response.copy_to(&mut tempfile_write_handle)?
565 };
566 #[cfg(not(feature = "progress-bar"))]
567 let bytes = response.copy_to(&mut tempfile_write_handle)?;
568
569 info!("Downloaded {bytes} bytes");
570 debug!("Writing meta file");
571
572 let meta = Meta::new(
573 String::from(resource),
574 path.into(),
575 etag.clone(),
576 self.freshness_lifetime,
577 );
578 meta.to_file()?;
579
580 debug!("Renaming temp file to cache location for {url}");
581
582 fs::rename(tempfile.path(), path)?;
583
584 Ok(meta)
585 }
586
587 fn try_get_etag(&self, resource: &str, url: &reqwest::Url) -> Result<Option<String>, Error> {
588 let mut retries: u32 = 0;
589 loop {
590 match self.get_etag(url) {
591 Ok(etag) => return Ok(etag),
592 Err(err) => {
593 if retries >= self.max_retries {
594 error!("Max retries exceeded for {resource}");
595 return Err(err);
596 }
597 if !err.is_retriable() {
598 error!("ETAG fetch for {resource} failed with fatal error");
599 return Err(err);
600 }
601 retries += 1;
602 let retry_delay = self.get_retry_delay(retries);
603 warn!(
604 "ETAG fetch failed for {resource}, retrying in {retry_delay} milliseconds..."
605 );
606 thread::sleep(time::Duration::from_millis(u64::from(retry_delay)));
607 }
608 }
609 }
610 }
611
612 fn get_etag(&self, url: &reqwest::Url) -> Result<Option<String>, Error> {
613 debug!("Fetching ETAG for {url}");
614 let response = self
615 .http_client
616 .head(url.clone())
617 .send()?
618 .error_for_status()?;
619 if let Some(etag) = response.headers().get(ETAG) {
620 if let Ok(s) = etag.to_str() {
621 Ok(Some(s.into()))
622 } else {
623 debug!("No ETAG for {url}");
624 Ok(None)
625 }
626 } else {
627 Ok(None)
628 }
629 }
630
631 fn resource_to_filepath(
632 &self,
633 resource: &str,
634 etag: &Option<String>,
635 subdir: Option<&str>,
636 suffix: Option<&str>,
637 ) -> PathBuf {
638 let resource_hash = hash_str(resource);
639 let mut filename = if let Some(tag) = etag {
640 let etag_hash = hash_str(&tag[..]);
641 format!("{resource_hash}.{etag_hash}")
642 } else {
643 resource_hash
644 };
645
646 if let Some(suf) = suffix {
647 filename.push_str(suf);
648 }
649
650 let filepath = PathBuf::from(filename);
651
652 if let Some(subdir_path) = subdir {
653 self.dir.join(subdir_path).join(filepath)
654 } else {
655 self.dir.join(filepath)
656 }
657 }
658}
659
660#[cfg(test)]
661mod tests {
662 use super::*;
663 use tempfile::tempdir;
664
665 #[test]
666 fn test_url_to_filename_with_etag() {
667 let cache_dir = tempdir().unwrap();
668 let cache = Cache::builder()
669 .dir(cache_dir.path().to_owned())
670 .build()
671 .unwrap();
672
673 let resource = "http://localhost:5000/foo.txt";
674 let etag = String::from("abcd");
675
676 assert_eq!(
677 cache
678 .resource_to_filepath(resource, &Some(etag), None, None)
679 .to_str()
680 .unwrap(),
681 format!(
682 "{}{}{}.{}",
683 cache_dir.path().to_str().unwrap(),
684 std::path::MAIN_SEPARATOR,
685 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
686 "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589"
687 )
688 );
689 }
690
691 #[test]
692 fn test_url_to_filename_no_etag() {
693 let cache_dir = tempdir().unwrap();
694 let cache = Cache::builder()
695 .dir(cache_dir.path().to_owned())
696 .build()
697 .unwrap();
698
699 let resource = "http://localhost:5000/foo.txt";
700 assert_eq!(
701 cache
702 .resource_to_filepath(resource, &None, None, None)
703 .to_str()
704 .unwrap(),
705 format!(
706 "{}{}{}",
707 cache_dir.path().to_str().unwrap(),
708 std::path::MAIN_SEPARATOR,
709 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
710 )
711 );
712 }
713
714 #[test]
715 fn test_url_to_filename_in_subdir() {
716 let cache_dir = tempdir().unwrap();
717 let cache = Cache::builder()
718 .dir(cache_dir.path().to_owned())
719 .build()
720 .unwrap();
721
722 let resource = "http://localhost:5000/foo.txt";
723 assert_eq!(
724 cache
725 .resource_to_filepath(resource, &None, Some("target"), None)
726 .to_str()
727 .unwrap(),
728 format!(
729 "{}{}{}{}{}",
730 cache_dir.path().to_str().unwrap(),
731 std::path::MAIN_SEPARATOR,
732 "target",
733 std::path::MAIN_SEPARATOR,
734 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
735 )
736 );
737 }
738
739 #[test]
740 fn test_url_to_filename_with_suffix() {
741 let cache_dir = tempdir().unwrap();
742 let cache = Cache::builder()
743 .dir(cache_dir.path().to_owned())
744 .build()
745 .unwrap();
746
747 let resource = "http://localhost:5000/foo.txt";
748 assert_eq!(
749 cache
750 .resource_to_filepath(resource, &None, Some("target"), Some("-extracted"))
751 .to_str()
752 .unwrap(),
753 format!(
754 "{}{}{}{}{}-extracted",
755 cache_dir.path().to_str().unwrap(),
756 std::path::MAIN_SEPARATOR,
757 "target",
758 std::path::MAIN_SEPARATOR,
759 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
760 )
761 );
762 }
763}