Skip to main content

web_scrape/cache/
web_cache.rs

1use crate::Error;
2use clerr::{Code, Report};
3use colored::Colorize;
4use enc::base_64::Base64Encoder;
5use enc::hex::HexEncoder;
6use enc::StringEncoder;
7use file_storage::{FilePath, FolderPath};
8use std::hash::{DefaultHasher, Hash, Hasher};
9use web_url::WebUrl;
10
11/// Responsible for caching data from the web.
12#[derive(Clone, Debug)]
13pub struct WebCache {
14    local: Option<FolderPath>,
15    remote: Option<FolderPath>,
16    base_64_encoder: Base64Encoder,
17    extension: String,
18}
19
20impl WebCache {
21    //! Construction
22
23    /// Creates a new web cache.
24    pub fn new(local: Option<FolderPath>, remote: Option<FolderPath>) -> Self {
25        Self {
26            local,
27            remote,
28            base_64_encoder: Base64Encoder::default(),
29            extension: ".web-cache".to_string(),
30        }
31    }
32}
33
34impl WebCache {
35    //! Read
36
37    /// Reads the optional cached data for the `url`.
38    pub fn read(&self, url: &WebUrl) -> Result<Option<Vec<u8>>, Error> {
39        if let Some(local) = &self.local {
40            let file: FilePath = self.file_for_root(url, local)?;
41            if let Some(data) = file.read_as_vec_if_exists()? {
42                return Ok(Some(data));
43            }
44        }
45
46        if let Some(remote) = &self.remote {
47            let file: FilePath = self.file_for_root(url, remote)?;
48            if let Some(data) = file.read_as_vec_if_exists()? {
49                return Ok(Some(data));
50            }
51        }
52
53        Ok(None)
54    }
55}
56
57impl WebCache {
58    //! Write
59
60    /// Overwrites the cached `data` for the `url`.
61    pub fn write(&self, url: &WebUrl, data: &[u8]) -> Result<(), Error> {
62        if let Some(local) = &self.local {
63            self.write_to_root(url, data, local)?;
64        }
65        if let Some(remote) = &self.remote {
66            self.write_to_root(url, data, remote)?;
67        }
68        Ok(())
69    }
70
71    fn write_to_root(&self, url: &WebUrl, data: &[u8], root: &FolderPath) -> Result<(), Error> {
72        let file: FilePath = self.file_for_root(url, root)?;
73        file.delete()?;
74        Ok(file.write_data_if_not_exists(data).map(|_| ())?)
75    }
76}
77
78impl WebCache {
79    //! Files
80
81    /// Gets the file for the `url` given the `root` folder.
82    fn file_for_root(&self, url: &WebUrl, root: &FolderPath) -> Result<FilePath, Error> {
83        let folder_char: char = self.folder_char(url.as_str());
84        let base_64: String = self
85            .base_64_encoder
86            .encode_as_string(url.as_str().as_bytes())
87            .map_err(|e| {
88                Error::Other(
89                    Report::new(Code::error(
90                        "CACHE_BASE_64",
91                        format!("error converting the URL to base-64: {}", url),
92                    ))
93                    .with_entry(vec![e.to_string().normal()]),
94                )
95            })?;
96        let extra: usize = folder_char.len_utf8()
97            + root.path().file_separator().len_utf8()
98            + base_64.len()
99            + self.extension.len();
100        root.clone_with_extra_capacity(extra)
101            .with_appended_char(folder_char)
102            .make_folder()
103            .with_appended(base_64.as_str())
104            .make_file(self.extension.as_str())
105            .map_err(|path| {
106                Error::Other(Report::new(Code::error(
107                    "CACHE_FILE_EXTENSION",
108                    format!("the file extension makes the path a folder: {}", path),
109                )))
110            })
111    }
112
113    /// Gets the folder char for the cache `key`. (a single lowercase hex char)
114    fn folder_char(&self, key: &str) -> char {
115        let mut hasher: DefaultHasher = DefaultHasher::default();
116        key.hash(&mut hasher);
117        let hash: u64 = hasher.finish();
118        let hash: u64 = (hash >> 32) ^ hash;
119        let hash: u64 = (hash >> 16) ^ hash;
120        let hash: u64 = (hash >> 8) ^ hash;
121        let hash: u64 = (hash >> 4) ^ hash;
122        let hash: u8 = hash as u8;
123        let (_, hex) = HexEncoder::LOWER.encode_chars(hash);
124        hex
125    }
126}