1use fs2::FileExt;
2use glob::glob;
3use log::{debug, error, info, warn};
4use rand::distributions::{Distribution, Uniform};
5use reqwest::blocking::{Client, ClientBuilder};
6use reqwest::header::ETAG;
7use std::default::Default;
8use std::env;
9use std::fs::{self, OpenOptions};
10use std::path::{Path, PathBuf};
11use std::thread;
12use std::time::{self, Duration};
13use tempfile::NamedTempFile;
14
15use crate::archives::{extract_archive, ArchiveFormat};
16use crate::utils::hash_str;
17use crate::{meta::Meta, Error, ProgressBar};
18
19#[derive(Debug)]
21pub struct CacheBuilder {
22 config: Config,
23}
24
25#[derive(Debug)]
26struct Config {
27 dir: Option<PathBuf>,
28 client_builder: ClientBuilder,
29 max_retries: u32,
30 max_backoff: u32,
31 freshness_lifetime: Option<u64>,
32 offline: bool,
33 progress_bar: Option<ProgressBar>,
34}
35
36impl CacheBuilder {
37 pub fn new() -> CacheBuilder {
39 CacheBuilder {
40 config: Config {
41 dir: None,
42 client_builder: ClientBuilder::new().timeout(None),
43 max_retries: 3,
44 max_backoff: 5000,
45 freshness_lifetime: None,
46 offline: false,
47 progress_bar: Some(ProgressBar::default()),
48 },
49 }
50 }
51
52 pub fn with_client_builder(client_builder: ClientBuilder) -> CacheBuilder {
54 CacheBuilder::new().client_builder(client_builder)
55 }
56
57 pub fn dir(mut self, dir: PathBuf) -> CacheBuilder {
61 self.config.dir = Some(dir);
62 self
63 }
64
65 pub fn client_builder(mut self, client_builder: ClientBuilder) -> CacheBuilder {
67 self.config.client_builder = client_builder;
68 self
69 }
70
71 pub fn timeout(mut self, timeout: Duration) -> CacheBuilder {
73 self.config.client_builder = self.config.client_builder.timeout(timeout);
74 self
75 }
76
77 pub fn connect_timeout(mut self, timeout: Duration) -> CacheBuilder {
79 self.config.client_builder = self.config.client_builder.connect_timeout(timeout);
80 self
81 }
82
83 pub fn max_retries(mut self, max_retries: u32) -> CacheBuilder {
85 self.config.max_retries = max_retries;
86 self
87 }
88
89 pub fn max_backoff(mut self, max_backoff: u32) -> CacheBuilder {
91 self.config.max_backoff = max_backoff;
92 self
93 }
94
95 pub fn freshness_lifetime(mut self, freshness_lifetime: u64) -> CacheBuilder {
98 self.config.freshness_lifetime = Some(freshness_lifetime);
99 self
100 }
101
102 pub fn offline(mut self, offline: bool) -> CacheBuilder {
109 self.config.offline = offline;
110 self
111 }
112
113 pub fn progress_bar(mut self, progress_bar: Option<ProgressBar>) -> CacheBuilder {
117 self.config.progress_bar = progress_bar;
118 self
119 }
120
121 pub fn build(self) -> Result<Cache, Error> {
123 let dir = self.config.dir.unwrap_or_else(|| {
124 if let Some(dir_str) = env::var_os("RUST_CACHED_PATH_ROOT") {
125 PathBuf::from(dir_str)
126 } else {
127 env::temp_dir().join("cache/")
128 }
129 });
130 let http_client = self.config.client_builder.build()?;
131 fs::create_dir_all(&dir)?;
132 Ok(Cache {
133 dir,
134 http_client,
135 max_retries: self.config.max_retries,
136 max_backoff: self.config.max_backoff,
137 freshness_lifetime: self.config.freshness_lifetime,
138 offline: self.config.offline,
139 progress_bar: self.config.progress_bar,
140 })
141 }
142}
143
144impl Default for CacheBuilder {
145 fn default() -> Self {
146 Self::new()
147 }
148}
149
150#[derive(Default)]
152pub struct Options {
153 pub subdir: Option<String>,
155 pub extract: bool,
157 pub force: bool,
159}
160
161impl Options {
162 pub fn new(subdir: Option<&str>, extract: bool, force: bool) -> Self {
163 Self {
164 subdir: subdir.map(String::from),
165 extract,
166 force,
167 }
168 }
169
170 pub fn subdir(mut self, subdir: &str) -> Self {
172 self.subdir = Some(subdir.into());
173 self
174 }
175
176 pub fn extract(mut self) -> Self {
178 self.extract = true;
179 self
180 }
181
182 pub fn force(mut self) -> Self {
184 self.force = true;
185 self
186 }
187}
188
189#[derive(Debug, Clone)]
191pub struct Cache {
192 pub dir: PathBuf,
194 max_retries: u32,
196 max_backoff: u32,
198 freshness_lifetime: Option<u64>,
204 offline: bool,
208 progress_bar: Option<ProgressBar>,
210 http_client: Client,
212}
213
214impl Cache {
215 pub fn new() -> Result<Self, Error> {
217 Cache::builder().build()
218 }
219
220 pub fn builder() -> CacheBuilder {
222 CacheBuilder::new()
223 }
224
225 pub fn cached_path(&self, resource: &str) -> Result<PathBuf, Error> {
230 self.cached_path_with_options(resource, &Options::default())
231 }
232
233 pub fn cached_path_with_options(
265 &self,
266 resource: &str,
267 options: &Options,
268 ) -> Result<PathBuf, Error> {
269 let cached_path: PathBuf;
270 let mut extraction_dir: Option<PathBuf> = None;
271
272 if !resource.starts_with("http") {
273 info!("Treating {resource} as local file");
276 cached_path = PathBuf::from(resource);
277
278 if !cached_path.is_file() {
279 return Err(Error::ResourceNotFound(String::from(resource)));
280 }
281
282 if options.extract {
283 let resource_last_modified = fs::metadata(resource)?
288 .modified()
289 .ok()
290 .and_then(|sys_time| sys_time.elapsed().ok())
291 .map(|duration| format!("{}", duration.as_secs()));
292 extraction_dir = Some(self.resource_to_filepath(
293 resource,
294 &resource_last_modified,
295 options.subdir.as_deref(),
296 Some("-extracted"),
297 ));
298 }
299 } else {
300 let meta =
302 self.fetch_remote_resource(resource, options.subdir.as_deref(), options.force)?;
303
304 if options.extract {
306 extraction_dir = Some(meta.get_extraction_path());
307 }
308
309 cached_path = meta.resource_path;
310 }
311
312 if let Some(dirpath) = extraction_dir {
313 debug!("Treating {resource} as archive");
315
316 fs::create_dir_all(dirpath.parent().unwrap())?;
317
318 debug!("Acquiring lock on extraction directory for {resource}");
321 let lock_path = format!("{}.lock", dirpath.to_str().unwrap());
322 let filelock = OpenOptions::new()
323 .read(true)
324 .write(true)
325 .create(true)
326 .truncate(true)
327 .open(lock_path)?;
328 filelock.lock_exclusive()?;
329 debug!("Lock on extraction directory acquired for {resource}");
330
331 if !dirpath.is_dir() {
332 info!("Extracting {resource} to {dirpath:?}");
333 let format = ArchiveFormat::parse_from_extension(&cached_path)?;
334 extract_archive(&cached_path, &dirpath, &format)?;
335 }
336
337 fs2::FileExt::unlock(&filelock)?;
338 debug!("Lock released on extraction directory for {resource}");
340
341 Ok(dirpath)
342 } else {
343 Ok(cached_path)
344 }
345 }
346
347 #[deprecated(
363 since = "0.4.4",
364 note = "Please use Cache::cached_path_with_options() instead"
365 )]
366 pub fn cached_path_in_subdir(
367 &self,
368 resource: &str,
369 subdir: Option<&str>,
370 ) -> Result<PathBuf, Error> {
371 let options = Options::new(subdir, false, false);
372 self.cached_path_with_options(resource, &options)
373 }
374
375 fn fetch_remote_resource(
376 &self,
377 resource: &str,
378 subdir: Option<&str>,
379 force: bool,
380 ) -> Result<Meta, Error> {
381 let url =
383 reqwest::Url::parse(resource).map_err(|_| Error::InvalidUrl(String::from(resource)))?;
384
385 if let Some(subdir_path) = subdir {
387 fs::create_dir_all(self.dir.join(subdir_path))?;
388 } else {
389 fs::create_dir_all(&self.dir)?;
390 };
391
392 if !force {
393 let versions = self.find_existing(resource, subdir); if self.offline {
397 if !versions.is_empty() {
398 info!("Found existing cached version of {resource}");
399 return Ok(versions[0].clone());
400 } else {
401 error!("Offline mode is enabled but no cached versions of resource exist.");
402 return Err(Error::NoCachedVersions(String::from(resource)));
403 }
404 } else if !versions.is_empty() && versions[0].is_fresh(self.freshness_lifetime) {
405 info!("Latest cached version of {resource} is still fresh");
407 return Ok(versions[0].clone());
408 }
409 } else if self.offline {
410 return Err(Error::ConfigurationError(
411 "'force=true' is invalid with offline mode enabled".to_string(),
412 ));
413 }
414
415 let etag = self.try_get_etag(resource, &url)?;
419 let path = self.resource_to_filepath(resource, &etag, subdir, None);
420
421 debug!("Acquiring lock for cache of {resource}");
424 let lock_path = format!("{}.lock", path.to_str().unwrap());
425 let filelock = OpenOptions::new()
426 .read(true)
427 .write(true)
428 .create(true)
429 .truncate(true)
430 .open(lock_path)?;
431 filelock.lock_exclusive()?;
432 debug!("Lock acquired for {resource}");
433
434 if path.exists() {
435 if !force {
436 info!("Cached version of {resource} is up-to-date");
440 fs2::FileExt::unlock(&filelock)?;
442 return Meta::from_cache(&path);
443 } else {
444 warn!("Forcing re-download of {resource} despite cache hit");
445 }
446 }
447
448 let meta = self.try_download_resource(resource, &url, &path, &etag)?;
450
451 info!("New version of {resource} cached");
452
453 fs2::FileExt::unlock(&filelock)?;
455 debug!("Lock released for {resource}");
456
457 Ok(meta)
458 }
459
460 fn find_existing(&self, resource: &str, subdir: Option<&str>) -> Vec<Meta> {
462 let mut existing_meta: Vec<Meta> = vec![];
463 let glob_string = format!(
464 "{}*.meta",
465 self.resource_to_filepath(resource, &None, subdir, None)
466 .to_str()
467 .unwrap(),
468 );
469 for meta_path in glob(&glob_string).unwrap().filter_map(Result::ok) {
470 if let Ok(meta) = Meta::from_path(&meta_path) {
471 existing_meta.push(meta);
472 }
473 }
474 existing_meta
475 .sort_unstable_by(|a, b| b.creation_time.partial_cmp(&a.creation_time).unwrap());
476 existing_meta
477 }
478
479 fn get_retry_delay(&self, retries: u32) -> u32 {
480 let between = Uniform::from(0..1000);
481 let mut rng = rand::thread_rng();
482 std::cmp::min(
483 2u32.pow(retries - 1) * 1000 + between.sample(&mut rng),
484 self.max_backoff,
485 )
486 }
487
488 fn try_download_resource(
489 &self,
490 resource: &str,
491 url: &reqwest::Url,
492 path: &Path,
493 etag: &Option<String>,
494 ) -> Result<Meta, Error> {
495 let mut retries: u32 = 0;
496 loop {
497 match self.download_resource(resource, url, path, etag) {
498 Ok(meta) => {
499 return Ok(meta);
500 }
501 Err(err) => {
502 if retries >= self.max_retries {
503 error!("Max retries exceeded for {resource}");
504 return Err(err);
505 }
506 if !err.is_retriable() {
507 error!("Download failed for {resource} with fatal error, {err}");
508 return Err(err);
509 }
510 retries += 1;
511 let retry_delay = self.get_retry_delay(retries);
512 warn!(
513 "Download failed for {resource}: {err}\nRetrying in {retry_delay} milliseconds..."
514 );
515 thread::sleep(time::Duration::from_millis(u64::from(retry_delay)));
516 }
517 }
518 }
519 }
520
521 fn download_resource(
522 &self,
523 resource: &str,
524 url: &reqwest::Url,
525 path: &Path,
526 etag: &Option<String>,
527 ) -> Result<Meta, Error> {
528 debug!("Attempting connection to {url}");
529
530 let mut response = self
531 .http_client
532 .get(url.clone())
533 .send()?
534 .error_for_status()?;
535
536 debug!("Opened connection to {url}");
537
538 let tempfile = NamedTempFile::new_in(path.parent().unwrap())?;
542 let mut tempfile_write_handle = OpenOptions::new().write(true).open(tempfile.path())?;
543
544 info!("Starting download of {url}");
545
546 let bytes = if let Some(progress_bar) = &self.progress_bar {
547 let mut download_wrapper = progress_bar.wrap_download(
548 resource,
549 response.content_length(),
550 tempfile_write_handle,
551 );
552 let bytes = response.copy_to(&mut download_wrapper)?;
553 download_wrapper.finish();
554 bytes
555 } else {
556 response.copy_to(&mut tempfile_write_handle)?
557 };
558
559 info!("Downloaded {bytes} bytes");
560 debug!("Writing meta file");
561
562 let meta = Meta::new(
563 String::from(resource),
564 path.into(),
565 etag.clone(),
566 self.freshness_lifetime,
567 );
568 meta.to_file()?;
569
570 debug!("Renaming temp file to cache location for {url}");
571
572 fs::rename(tempfile.path(), path)?;
573
574 Ok(meta)
575 }
576
577 fn try_get_etag(&self, resource: &str, url: &reqwest::Url) -> Result<Option<String>, Error> {
578 let mut retries: u32 = 0;
579 loop {
580 match self.get_etag(url) {
581 Ok(etag) => return Ok(etag),
582 Err(err) => {
583 if retries >= self.max_retries {
584 error!("Max retries exceeded for {resource}");
585 return Err(err);
586 }
587 if !err.is_retriable() {
588 error!("ETAG fetch for {resource} failed with fatal error");
589 return Err(err);
590 }
591 retries += 1;
592 let retry_delay = self.get_retry_delay(retries);
593 warn!(
594 "ETAG fetch failed for {resource}, retrying in {retry_delay} milliseconds..."
595 );
596 thread::sleep(time::Duration::from_millis(u64::from(retry_delay)));
597 }
598 }
599 }
600 }
601
602 fn get_etag(&self, url: &reqwest::Url) -> Result<Option<String>, Error> {
603 debug!("Fetching ETAG for {url}");
604 let response = self
605 .http_client
606 .head(url.clone())
607 .send()?
608 .error_for_status()?;
609 if let Some(etag) = response.headers().get(ETAG) {
610 if let Ok(s) = etag.to_str() {
611 Ok(Some(s.into()))
612 } else {
613 debug!("No ETAG for {url}");
614 Ok(None)
615 }
616 } else {
617 Ok(None)
618 }
619 }
620
621 fn resource_to_filepath(
622 &self,
623 resource: &str,
624 etag: &Option<String>,
625 subdir: Option<&str>,
626 suffix: Option<&str>,
627 ) -> PathBuf {
628 let resource_hash = hash_str(resource);
629 let mut filename = if let Some(tag) = etag {
630 let etag_hash = hash_str(&tag[..]);
631 format!("{resource_hash}.{etag_hash}")
632 } else {
633 resource_hash
634 };
635
636 if let Some(suf) = suffix {
637 filename.push_str(suf);
638 }
639
640 let filepath = PathBuf::from(filename);
641
642 if let Some(subdir_path) = subdir {
643 self.dir.join(subdir_path).join(filepath)
644 } else {
645 self.dir.join(filepath)
646 }
647 }
648}
649
650#[cfg(test)]
651mod tests {
652 use super::*;
653 use tempfile::tempdir;
654
655 #[test]
656 fn test_url_to_filename_with_etag() {
657 let cache_dir = tempdir().unwrap();
658 let cache = Cache::builder()
659 .dir(cache_dir.path().to_owned())
660 .build()
661 .unwrap();
662
663 let resource = "http://localhost:5000/foo.txt";
664 let etag = String::from("abcd");
665
666 assert_eq!(
667 cache
668 .resource_to_filepath(resource, &Some(etag), None, None)
669 .to_str()
670 .unwrap(),
671 format!(
672 "{}{}{}.{}",
673 cache_dir.path().to_str().unwrap(),
674 std::path::MAIN_SEPARATOR,
675 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
676 "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589"
677 )
678 );
679 }
680
681 #[test]
682 fn test_url_to_filename_no_etag() {
683 let cache_dir = tempdir().unwrap();
684 let cache = Cache::builder()
685 .dir(cache_dir.path().to_owned())
686 .build()
687 .unwrap();
688
689 let resource = "http://localhost:5000/foo.txt";
690 assert_eq!(
691 cache
692 .resource_to_filepath(resource, &None, None, None)
693 .to_str()
694 .unwrap(),
695 format!(
696 "{}{}{}",
697 cache_dir.path().to_str().unwrap(),
698 std::path::MAIN_SEPARATOR,
699 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
700 )
701 );
702 }
703
704 #[test]
705 fn test_url_to_filename_in_subdir() {
706 let cache_dir = tempdir().unwrap();
707 let cache = Cache::builder()
708 .dir(cache_dir.path().to_owned())
709 .build()
710 .unwrap();
711
712 let resource = "http://localhost:5000/foo.txt";
713 assert_eq!(
714 cache
715 .resource_to_filepath(resource, &None, Some("target"), None)
716 .to_str()
717 .unwrap(),
718 format!(
719 "{}{}{}{}{}",
720 cache_dir.path().to_str().unwrap(),
721 std::path::MAIN_SEPARATOR,
722 "target",
723 std::path::MAIN_SEPARATOR,
724 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
725 )
726 );
727 }
728
729 #[test]
730 fn test_url_to_filename_with_suffix() {
731 let cache_dir = tempdir().unwrap();
732 let cache = Cache::builder()
733 .dir(cache_dir.path().to_owned())
734 .build()
735 .unwrap();
736
737 let resource = "http://localhost:5000/foo.txt";
738 assert_eq!(
739 cache
740 .resource_to_filepath(resource, &None, Some("target"), Some("-extracted"))
741 .to_str()
742 .unwrap(),
743 format!(
744 "{}{}{}{}{}-extracted",
745 cache_dir.path().to_str().unwrap(),
746 std::path::MAIN_SEPARATOR,
747 "target",
748 std::path::MAIN_SEPARATOR,
749 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
750 )
751 );
752 }
753}