Skip to main content

web_scrape/cache/
web_cache.rs

1use crate::Error;
2use clerr::{Code, Report};
3use colored::Colorize;
4use enc::base_64::Base64Encoder;
5use enc::hex::HexEncoder;
6use enc::StringEncoder;
7use file_storage::{FilePath, FolderPath};
8use std::hash::{DefaultHasher, Hash, Hasher};
9use web_url::WebUrl;
10
11/// Responsible for caching web data.
12#[derive(Clone, Debug)]
13pub struct WebCache {
14    local: Option<FolderPath>,
15    remote: Option<FolderPath>,
16    encoder: Base64Encoder,
17    extension: String,
18}
19
20impl WebCache {
21    //! Construction
22
23    /// Creates a new web cache.
24    pub fn new(local: Option<FolderPath>, remote: Option<FolderPath>) -> Self {
25        Self {
26            local,
27            remote,
28            encoder: Base64Encoder::url_safe_encoder(),
29            extension: ".web-cache".to_string(),
30        }
31    }
32}
33
34impl WebCache {
35    //! Read
36
37    /// Reads the optional cached data for the `url`.
38    pub fn read(&self, url: &WebUrl) -> Result<Option<Vec<u8>>, Error> {
39        if let Some(local) = &self.local {
40            let file: FilePath = self.file_for_root(url, local)?;
41            if let Some(data) = file.read_as_vec_if_exists()? {
42                return Ok(Some(data));
43            }
44        }
45
46        if let Some(remote) = &self.remote {
47            let file: FilePath = self.file_for_root(url, remote)?;
48            if let Some(data) = file.read_as_vec_if_exists()? {
49                return Ok(Some(data));
50            }
51        }
52
53        Ok(None)
54    }
55}
56
57impl WebCache {
58    //! Write
59
60    /// Overwrites the cached `data` for the `url`.
61    pub fn write(&self, url: &WebUrl, data: &[u8]) -> Result<(), Error> {
62        if let Some(local) = &self.local {
63            self.write_to_root(url, data, local)?;
64        }
65        if let Some(remote) = &self.remote {
66            self.write_to_root(url, data, remote)?;
67        }
68        Ok(())
69    }
70
71    /// Writes the `data` from the `url` to the `root` folder.
72    fn write_to_root(&self, url: &WebUrl, data: &[u8], root: &FolderPath) -> Result<(), Error> {
73        let file: FilePath = self.file_for_root(url, root)?;
74        file.delete()?;
75        Ok(file.write_data_if_not_exists(data).map(|_| ())?)
76    }
77}
78
79impl WebCache {
80    //! Files
81
82    /// Gets the file for the `url` given the `root` folder.
83    fn file_for_root(&self, url: &WebUrl, root: &FolderPath) -> Result<FilePath, Error> {
84        let folder_char: char = self.folder_char(url.as_str());
85        let base_64: String = self
86            .encoder
87            .encode_as_string(url.as_str().as_bytes())
88            .map_err(|e| {
89                Error::Other(
90                    Report::new(Code::error(
91                        "WEB_CACHE_BASE_64",
92                        format!("error converting the URL to base-64: {}", url),
93                    ))
94                    .with_entry(vec![e.to_string().normal()]),
95                )
96            })?;
97        let extra: usize = folder_char.len_utf8()
98            + root.path().file_separator().len_utf8()
99            + base_64.len()
100            + self.extension.len();
101        root.clone_with_extra_capacity(extra)
102            .with_appended_char(folder_char)
103            .make_folder()
104            .with_appended(base_64.as_str())
105            .make_file(self.extension.as_str())
106            .map_err(|path| {
107                Error::Other(Report::new(Code::error(
108                    "WEB_CACHE_INVALID_EXTENSION",
109                    format!("the file extension makes the path a folder: {}", path),
110                )))
111            })
112    }
113
114    /// Gets the folder char for the cache `key`. (a single lowercase hex char)
115    fn folder_char(&self, key: &str) -> char {
116        let mut hasher: DefaultHasher = DefaultHasher::default();
117        key.hash(&mut hasher);
118        let hash: u64 = hasher.finish();
119        let hash: u64 = (hash >> 32) ^ hash;
120        let hash: u64 = (hash >> 16) ^ hash;
121        let hash: u64 = (hash >> 8) ^ hash;
122        let hash: u64 = (hash >> 4) ^ hash;
123        let hash: u8 = hash as u8;
124        let (_, hex) = HexEncoder::LOWER.encode_chars(hash);
125        hex
126    }
127}