1use fs2::FileExt;
2use glob::glob;
3use log::{debug, error, info, warn};
4use rand::distributions::{Distribution, Uniform};
5use reqwest::blocking::{Client, ClientBuilder};
6use reqwest::header::ETAG;
7use std::default::Default;
8use std::env;
9use std::fs::{self, OpenOptions};
10use std::path::{Path, PathBuf};
11use std::thread;
12use std::time::{self, Duration};
13use tempfile::NamedTempFile;
14
15use crate::archives::{extract_archive, ArchiveFormat};
16use crate::utils::hash_str;
17use crate::{meta::Meta, Error, ProgressBar};
18
19#[derive(Debug)]
21pub struct CacheBuilder {
22 config: Config,
23}
24
25#[derive(Debug)]
26struct Config {
27 dir: Option<PathBuf>,
28 client_builder: ClientBuilder,
29 max_retries: u32,
30 max_backoff: u32,
31 freshness_lifetime: Option<u64>,
32 offline: bool,
33 progress_bar: Option<ProgressBar>,
34}
35
36impl CacheBuilder {
37 pub fn new() -> CacheBuilder {
39 CacheBuilder {
40 config: Config {
41 dir: None,
42 client_builder: ClientBuilder::new().timeout(None),
43 max_retries: 3,
44 max_backoff: 5000,
45 freshness_lifetime: None,
46 offline: false,
47 progress_bar: Some(ProgressBar::default()),
48 },
49 }
50 }
51
52 pub fn with_client_builder(client_builder: ClientBuilder) -> CacheBuilder {
54 CacheBuilder::new().client_builder(client_builder)
55 }
56
57 pub fn dir(mut self, dir: PathBuf) -> CacheBuilder {
61 self.config.dir = Some(dir);
62 self
63 }
64
65 pub fn client_builder(mut self, client_builder: ClientBuilder) -> CacheBuilder {
67 self.config.client_builder = client_builder;
68 self
69 }
70
71 pub fn timeout(mut self, timeout: Duration) -> CacheBuilder {
73 self.config.client_builder = self.config.client_builder.timeout(timeout);
74 self
75 }
76
77 pub fn connect_timeout(mut self, timeout: Duration) -> CacheBuilder {
79 self.config.client_builder = self.config.client_builder.connect_timeout(timeout);
80 self
81 }
82
83 pub fn max_retries(mut self, max_retries: u32) -> CacheBuilder {
85 self.config.max_retries = max_retries;
86 self
87 }
88
89 pub fn max_backoff(mut self, max_backoff: u32) -> CacheBuilder {
91 self.config.max_backoff = max_backoff;
92 self
93 }
94
95 pub fn freshness_lifetime(mut self, freshness_lifetime: u64) -> CacheBuilder {
98 self.config.freshness_lifetime = Some(freshness_lifetime);
99 self
100 }
101
102 pub fn offline(mut self, offline: bool) -> CacheBuilder {
109 self.config.offline = offline;
110 self
111 }
112
113 pub fn progress_bar(mut self, progress_bar: Option<ProgressBar>) -> CacheBuilder {
117 self.config.progress_bar = progress_bar;
118 self
119 }
120
121 pub fn build(self) -> Result<Cache, Error> {
123 let dir = self.config.dir.unwrap_or_else(|| {
124 if let Some(dir_str) = env::var_os("RUST_CACHED_PATH_ROOT") {
125 PathBuf::from(dir_str)
126 } else {
127 env::temp_dir().join("cache/")
128 }
129 });
130 let http_client = self.config.client_builder.build()?;
131 fs::create_dir_all(&dir)?;
132 Ok(Cache {
133 dir,
134 http_client,
135 max_retries: self.config.max_retries,
136 max_backoff: self.config.max_backoff,
137 freshness_lifetime: self.config.freshness_lifetime,
138 offline: self.config.offline,
139 progress_bar: self.config.progress_bar,
140 })
141 }
142}
143
144impl Default for CacheBuilder {
145 fn default() -> Self {
146 Self::new()
147 }
148}
149
150#[derive(Default)]
152pub struct Options {
153 pub subdir: Option<String>,
155 pub extract: bool,
157}
158
159impl Options {
160 pub fn new(subdir: Option<&str>, extract: bool) -> Self {
161 Self {
162 subdir: subdir.map(String::from),
163 extract,
164 }
165 }
166
167 pub fn subdir(mut self, subdir: &str) -> Self {
169 self.subdir = Some(subdir.into());
170 self
171 }
172
173 pub fn extract(mut self) -> Self {
175 self.extract = true;
176 self
177 }
178}
179
180#[derive(Debug, Clone)]
182pub struct Cache {
183 pub dir: PathBuf,
185 max_retries: u32,
187 max_backoff: u32,
189 freshness_lifetime: Option<u64>,
195 offline: bool,
199 progress_bar: Option<ProgressBar>,
201 http_client: Client,
203}
204
205impl Cache {
206 pub fn new() -> Result<Self, Error> {
208 Cache::builder().build()
209 }
210
211 pub fn builder() -> CacheBuilder {
213 CacheBuilder::new()
214 }
215
216 pub fn cached_path(&self, resource: &str) -> Result<PathBuf, Error> {
221 self.cached_path_with_options(resource, &Options::default())
222 }
223
224 pub fn cached_path_with_options(
256 &self,
257 resource: &str,
258 options: &Options,
259 ) -> Result<PathBuf, Error> {
260 let cached_path: PathBuf;
261 let mut extraction_dir: Option<PathBuf> = None;
262
263 if !resource.starts_with("http") {
264 info!("Treating {} as local file", resource);
267 cached_path = PathBuf::from(resource);
268
269 if !cached_path.is_file() {
270 return Err(Error::ResourceNotFound(String::from(resource)));
271 }
272
273 if options.extract {
274 let resource_last_modified = fs::metadata(resource)?
279 .modified()
280 .ok()
281 .and_then(|sys_time| sys_time.elapsed().ok())
282 .map(|duration| format!("{}", duration.as_secs()));
283 extraction_dir = Some(self.resource_to_filepath(
284 resource,
285 &resource_last_modified,
286 options.subdir.as_deref(),
287 Some("-extracted"),
288 ));
289 }
290 } else {
291 let meta = self.fetch_remote_resource(resource, options.subdir.as_deref())?;
293
294 if options.extract {
296 extraction_dir = Some(meta.get_extraction_path());
297 }
298
299 cached_path = meta.resource_path;
300 }
301
302 if let Some(dirpath) = extraction_dir {
303 debug!("Treating {} as archive", resource);
305
306 fs::create_dir_all(dirpath.parent().unwrap())?;
307
308 debug!("Acquiring lock on extraction directory for {}", resource);
311 let lock_path = format!("{}.lock", dirpath.to_str().unwrap());
312 let filelock = OpenOptions::new()
313 .read(true)
314 .write(true)
315 .create(true)
316 .truncate(true)
317 .open(lock_path)?;
318 filelock.lock_exclusive()?;
319 debug!("Lock on extraction directory acquired for {}", resource);
320
321 if !dirpath.is_dir() {
322 info!("Extracting {} to {:?}", resource, dirpath);
323 let format = ArchiveFormat::parse_from_extension(resource)?;
324 extract_archive(&cached_path, &dirpath, &format)?;
325 }
326
327 fs2::FileExt::unlock(&filelock)?;
328 debug!("Lock released on extraction directory for {}", resource);
330
331 Ok(dirpath)
332 } else {
333 Ok(cached_path)
334 }
335 }
336
337 #[deprecated(
353 since = "0.4.4",
354 note = "Please use Cache::cached_path_with_options() instead"
355 )]
356 pub fn cached_path_in_subdir(
357 &self,
358 resource: &str,
359 subdir: Option<&str>,
360 ) -> Result<PathBuf, Error> {
361 let options = Options::new(subdir, false);
362 self.cached_path_with_options(resource, &options)
363 }
364
365 fn fetch_remote_resource(&self, resource: &str, subdir: Option<&str>) -> Result<Meta, Error> {
366 let url =
368 reqwest::Url::parse(resource).map_err(|_| Error::InvalidUrl(String::from(resource)))?;
369
370 if let Some(subdir_path) = subdir {
372 fs::create_dir_all(self.dir.join(subdir_path))?;
373 } else {
374 fs::create_dir_all(&self.dir)?;
375 };
376
377 let versions = self.find_existing(resource, subdir); if self.offline {
381 if !versions.is_empty() {
382 info!("Found existing cached version of {}", resource);
383 return Ok(versions[0].clone());
384 } else {
385 error!("Offline mode is enabled but no cached versions of resource exist.");
386 return Err(Error::NoCachedVersions(String::from(resource)));
387 }
388 } else if !versions.is_empty() && versions[0].is_fresh(self.freshness_lifetime) {
389 info!("Latest cached version of {} is still fresh", resource);
391 return Ok(versions[0].clone());
392 }
393
394 let etag = self.try_get_etag(resource, &url)?;
398 let path = self.resource_to_filepath(resource, &etag, subdir, None);
399
400 debug!("Acquiring lock for cache of {}", resource);
403 let lock_path = format!("{}.lock", path.to_str().unwrap());
404 let filelock = OpenOptions::new()
405 .read(true)
406 .write(true)
407 .create(true)
408 .truncate(true)
409 .open(lock_path)?;
410 filelock.lock_exclusive()?;
411 debug!("Lock acquired for {}", resource);
412
413 if path.exists() {
414 info!("Cached version of {} is up-to-date", resource);
418 fs2::FileExt::unlock(&filelock)?;
420 return Meta::from_cache(&path);
421 }
422
423 let meta = self.try_download_resource(resource, &url, &path, &etag)?;
425
426 info!("New version of {} cached", resource);
427
428 fs2::FileExt::unlock(&filelock)?;
430 debug!("Lock released for {}", resource);
431
432 Ok(meta)
433 }
434
435 fn find_existing(&self, resource: &str, subdir: Option<&str>) -> Vec<Meta> {
437 let mut existing_meta: Vec<Meta> = vec![];
438 let glob_string = format!(
439 "{}*.meta",
440 self.resource_to_filepath(resource, &None, subdir, None)
441 .to_str()
442 .unwrap(),
443 );
444 for meta_path in glob(&glob_string).unwrap().filter_map(Result::ok) {
445 if let Ok(meta) = Meta::from_path(&meta_path) {
446 existing_meta.push(meta);
447 }
448 }
449 existing_meta
450 .sort_unstable_by(|a, b| b.creation_time.partial_cmp(&a.creation_time).unwrap());
451 existing_meta
452 }
453
454 fn get_retry_delay(&self, retries: u32) -> u32 {
455 let between = Uniform::from(0..1000);
456 let mut rng = rand::thread_rng();
457 std::cmp::min(
458 2u32.pow(retries - 1) * 1000 + between.sample(&mut rng),
459 self.max_backoff,
460 )
461 }
462
463 fn try_download_resource(
464 &self,
465 resource: &str,
466 url: &reqwest::Url,
467 path: &Path,
468 etag: &Option<String>,
469 ) -> Result<Meta, Error> {
470 let mut retries: u32 = 0;
471 loop {
472 match self.download_resource(resource, url, path, etag) {
473 Ok(meta) => {
474 return Ok(meta);
475 }
476 Err(err) => {
477 if retries >= self.max_retries {
478 error!("Max retries exceeded for {}", resource);
479 return Err(err);
480 }
481 if !err.is_retriable() {
482 error!("Download failed for {} with fatal error, {}", resource, err);
483 return Err(err);
484 }
485 retries += 1;
486 let retry_delay = self.get_retry_delay(retries);
487 warn!(
488 "Download failed for {}: {}\nRetrying in {} milliseconds...",
489 resource, err, retry_delay
490 );
491 thread::sleep(time::Duration::from_millis(u64::from(retry_delay)));
492 }
493 }
494 }
495 }
496
497 fn download_resource(
498 &self,
499 resource: &str,
500 url: &reqwest::Url,
501 path: &Path,
502 etag: &Option<String>,
503 ) -> Result<Meta, Error> {
504 debug!("Attempting connection to {}", url);
505
506 let mut response = self
507 .http_client
508 .get(url.clone())
509 .send()?
510 .error_for_status()?;
511
512 debug!("Opened connection to {}", url);
513
514 let tempfile = NamedTempFile::new_in(path.parent().unwrap())?;
518 let mut tempfile_write_handle = OpenOptions::new().write(true).open(tempfile.path())?;
519
520 info!("Starting download of {}", url);
521
522 let bytes = if let Some(progress_bar) = &self.progress_bar {
523 let mut download_wrapper = progress_bar.wrap_download(
524 resource,
525 response.content_length(),
526 tempfile_write_handle,
527 );
528 let bytes = response.copy_to(&mut download_wrapper)?;
529 download_wrapper.finish();
530 bytes
531 } else {
532 response.copy_to(&mut tempfile_write_handle)?
533 };
534
535 info!("Downloaded {} bytes", bytes);
536 debug!("Writing meta file");
537
538 let meta = Meta::new(
539 String::from(resource),
540 path.into(),
541 etag.clone(),
542 self.freshness_lifetime,
543 );
544 meta.to_file()?;
545
546 debug!("Renaming temp file to cache location for {}", url);
547
548 fs::rename(tempfile.path(), path)?;
549
550 Ok(meta)
551 }
552
553 fn try_get_etag(&self, resource: &str, url: &reqwest::Url) -> Result<Option<String>, Error> {
554 let mut retries: u32 = 0;
555 loop {
556 match self.get_etag(url) {
557 Ok(etag) => return Ok(etag),
558 Err(err) => {
559 if retries >= self.max_retries {
560 error!("Max retries exceeded for {}", resource);
561 return Err(err);
562 }
563 if !err.is_retriable() {
564 error!("ETAG fetch for {} failed with fatal error", resource);
565 return Err(err);
566 }
567 retries += 1;
568 let retry_delay = self.get_retry_delay(retries);
569 warn!(
570 "ETAG fetch failed for {}, retrying in {} milliseconds...",
571 resource, retry_delay
572 );
573 thread::sleep(time::Duration::from_millis(u64::from(retry_delay)));
574 }
575 }
576 }
577 }
578
579 fn get_etag(&self, url: &reqwest::Url) -> Result<Option<String>, Error> {
580 debug!("Fetching ETAG for {}", url);
581 let response = self
582 .http_client
583 .head(url.clone())
584 .send()?
585 .error_for_status()?;
586 if let Some(etag) = response.headers().get(ETAG) {
587 if let Ok(s) = etag.to_str() {
588 Ok(Some(s.into()))
589 } else {
590 debug!("No ETAG for {}", url);
591 Ok(None)
592 }
593 } else {
594 Ok(None)
595 }
596 }
597
598 fn resource_to_filepath(
599 &self,
600 resource: &str,
601 etag: &Option<String>,
602 subdir: Option<&str>,
603 suffix: Option<&str>,
604 ) -> PathBuf {
605 let resource_hash = hash_str(resource);
606 let mut filename = if let Some(tag) = etag {
607 let etag_hash = hash_str(&tag[..]);
608 format!("{}.{}", resource_hash, etag_hash)
609 } else {
610 resource_hash
611 };
612
613 if let Some(suf) = suffix {
614 filename.push_str(suf);
615 }
616
617 let filepath = PathBuf::from(filename);
618
619 if let Some(subdir_path) = subdir {
620 self.dir.join(subdir_path).join(filepath)
621 } else {
622 self.dir.join(filepath)
623 }
624 }
625}
626
627#[cfg(test)]
628mod tests {
629 use super::*;
630 use tempfile::tempdir;
631
632 #[test]
633 fn test_url_to_filename_with_etag() {
634 let cache_dir = tempdir().unwrap();
635 let cache = Cache::builder()
636 .dir(cache_dir.path().to_owned())
637 .build()
638 .unwrap();
639
640 let resource = "http://localhost:5000/foo.txt";
641 let etag = String::from("abcd");
642
643 assert_eq!(
644 cache
645 .resource_to_filepath(resource, &Some(etag), None, None)
646 .to_str()
647 .unwrap(),
648 format!(
649 "{}{}{}.{}",
650 cache_dir.path().to_str().unwrap(),
651 std::path::MAIN_SEPARATOR,
652 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
653 "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589"
654 )
655 );
656 }
657
658 #[test]
659 fn test_url_to_filename_no_etag() {
660 let cache_dir = tempdir().unwrap();
661 let cache = Cache::builder()
662 .dir(cache_dir.path().to_owned())
663 .build()
664 .unwrap();
665
666 let resource = "http://localhost:5000/foo.txt";
667 assert_eq!(
668 cache
669 .resource_to_filepath(resource, &None, None, None)
670 .to_str()
671 .unwrap(),
672 format!(
673 "{}{}{}",
674 cache_dir.path().to_str().unwrap(),
675 std::path::MAIN_SEPARATOR,
676 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
677 )
678 );
679 }
680
681 #[test]
682 fn test_url_to_filename_in_subdir() {
683 let cache_dir = tempdir().unwrap();
684 let cache = Cache::builder()
685 .dir(cache_dir.path().to_owned())
686 .build()
687 .unwrap();
688
689 let resource = "http://localhost:5000/foo.txt";
690 assert_eq!(
691 cache
692 .resource_to_filepath(resource, &None, Some("target"), None)
693 .to_str()
694 .unwrap(),
695 format!(
696 "{}{}{}{}{}",
697 cache_dir.path().to_str().unwrap(),
698 std::path::MAIN_SEPARATOR,
699 "target",
700 std::path::MAIN_SEPARATOR,
701 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
702 )
703 );
704 }
705
706 #[test]
707 fn test_url_to_filename_with_suffix() {
708 let cache_dir = tempdir().unwrap();
709 let cache = Cache::builder()
710 .dir(cache_dir.path().to_owned())
711 .build()
712 .unwrap();
713
714 let resource = "http://localhost:5000/foo.txt";
715 assert_eq!(
716 cache
717 .resource_to_filepath(resource, &None, Some("target"), Some("-extracted"))
718 .to_str()
719 .unwrap(),
720 format!(
721 "{}{}{}{}{}-extracted",
722 cache_dir.path().to_str().unwrap(),
723 std::path::MAIN_SEPARATOR,
724 "target",
725 std::path::MAIN_SEPARATOR,
726 "b5696dbf866311125e26a62bef0125854dd40f010a70be9cfd23634c997c1874",
727 )
728 );
729 }
730}