web_scrape/cache/
web_cache.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
use std::hash::{DefaultHasher, Hash, Hasher};

use enc::base_64::Base64Encoder;
use enc::hex::HexEncoder;
use enc::StringEncoder;
use file_storage::{FilePath, FolderPath};
use web_url::WebUrl;

use crate::Error;

/// Responsible for caching data from the web.
#[derive(Clone, Debug)]
pub struct WebCache {
    root: FolderPath,
    base_64_encoder: Base64Encoder,
}

impl From<FolderPath> for WebCache {
    fn from(root: FolderPath) -> Self {
        Self {
            root,
            base_64_encoder: Base64Encoder::new(b'-', b'_', None).unwrap(),
        }
    }
}

impl WebCache {
    //! Read

    /// Reads the optional cached data.
    pub fn read(&self, url: &WebUrl) -> Result<Option<Vec<u8>>, Error> {
        let file: FilePath = self.file(url)?;
        Ok(file.read_as_vec_if_exists()?)
    }
}

impl WebCache {
    //! Write

    /// Overwrites the cached `data`.
    pub fn write(&self, url: &WebUrl, data: &[u8]) -> Result<(), Error> {
        let file: FilePath = self.file(url)?;
        file.delete()?;
        Ok(file.write_data_if_not_exists(data).map(|_| ())?)
    }
}

impl WebCache {
    //! Clear

    /// Clears the cached data.
    pub fn clear(&self, url: &WebUrl) -> Result<(), Error> {
        let file: FilePath = self.file(url)?;
        Ok(file.delete()?)
    }
}

impl WebCache {
    //! Files

    /// Gets the folder char for the cache `key`. (a single lowercase hex char)
    fn folder_char(&self, key: &str) -> char {
        let mut hasher: DefaultHasher = DefaultHasher::default();
        key.hash(&mut hasher);
        let hash: u64 = hasher.finish();
        let hash: u64 = (hash >> 32) ^ hash;
        let hash: u64 = (hash >> 16) ^ hash;
        let hash: u64 = (hash >> 8) ^ hash;
        let hash: u64 = (hash >> 4) ^ hash;
        let hash: u8 = hash as u8;
        let (_, hex) = HexEncoder::LOWER.encode_chars(hash);
        hex
    }

    /// Gets the cache file for the `method` and `url`.
    fn file(&self, url: &WebUrl) -> Result<FilePath, Error> {
        let folder_char: char = self.folder_char(url.as_str());
        let base_64: String = self
            .base_64_encoder
            .encode_as_string(url.as_str().as_bytes())
            .map_err(|e| Error::Other(format!("error encoding URL: {}", e)))?;
        let extension: &str = ".web-cache";
        let extra: usize = folder_char.len_utf8()
            + self.root.path().file_separator().len_utf8()
            + base_64.len()
            + extension.len();
        self.root
            .clone_with_extra_capacity(extra)
            .with_appended_char(folder_char)
            .make_folder()
            .with_appended(base_64.as_str())
            .make_file(extension)
            .ok_or_else(|| Error::Other("the letter 'e' is a file-separator".to_string()))
    }
}