url_cleaner_engine/glue/
caching.rs

1//! Caching to allow for only expanding redirects the first time you encounter them.
2//!
3//! A cache is a Sqlite database containing one table, "cache", with 4 columns:
4//!
5//! - `subject` (`TEXT NOT NULL`): The subject of the cache entry. For example, redirects have their `subject` set to `redirect`.
6//!
7//! - `key` (`TEXT NOT NULL`): The key of the key/value pair. For example, redirects have their `key` set to the redirect URL.
8//!
9//! - `value` (`TEXT` (maybe null)): The value of the key/value pair. For example, redirects have their `value` set to the URL the starting redirect URL points to.
10//!
11//! - `duration` (`FLOAT`): The amount of time (in seconds) it took to do the thing being cached. For example, redirects have their `duration` set to about as long as it took to do the network request(s). This is used by [`CacheHandle`] to artificially delay cache reads if [`CacheHandleConfig::delay`] is [`true`] to reduce the ability of websites to tell if you've seen a certain URL before.
12//!
13//! Every pair of `subject` and `key` is unique.
14
15use std::time::Duration;
16
17use thiserror::Error;
18use serde::{Serialize, Deserialize};
19use diesel::prelude::*;
20use rand::TryRngCore;
21
22#[expect(unused_imports, reason = "Used in docs.")]
23use crate::types::*;
24use crate::util::*;
25
26pub mod path;
27pub use path::*;
28pub mod inner;
29pub use inner::*;
30pub mod outer;
31pub use outer::*;
32pub mod glue;
33pub use glue::*;
34
35/// A wrapper around a [`Cache`] for optional security features provided by [`CacheHandleConfig`].
36///
37/// Unlike [`Cache`], which is intended to be shared between [`Job`]s, [`CacheHandle`]s are intended to be made on a per-[`Job`] basis using the [`CacheHandleConfig`] appropriate for each particular [`Job`].
38///
39/// For example, a CLI program writing results to a file doesn't need to enable cache delay/unthreading, but a userscript should.
40/// # Examples
41/// ```
42/// use url_cleaner_engine::glue::*;
43/// use std::time::Duration;
44///
45/// let cache = CacheHandle {
46///     cache: &Default::default(),
47///     config: Default::default()
48/// };
49///
50/// assert_eq!(cache.read(CacheEntryKeys { subject: "subject", key: "key" }).unwrap().map(|entry| entry.value), None);
51/// cache.write(NewCacheEntry { subject: "subject", key: "key", value: None, duration: Default::default() }).unwrap();
52/// assert_eq!(cache.read(CacheEntryKeys { subject: "subject", key: "key" }).unwrap().map(|entry| entry.value), Some(None));
53/// cache.write(NewCacheEntry { subject: "subject", key: "key", value: Some("value"), duration: Default::default() }).unwrap();
54/// assert_eq!(cache.read(CacheEntryKeys { subject: "subject", key: "key" }).unwrap().map(|entry| entry.value), Some(Some("value".into())));
55/// ```
56#[derive(Debug, Clone, Copy)]
57pub struct CacheHandle<'a> {
58    /// The [`Cache`].
59    pub cache: &'a Cache,
60    /// The [`CacheHandleConfig`].
61    pub config: CacheHandleConfig
62}
63
64/// Configuration for how a [`CacheHandle`] should behave.
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
66pub struct CacheHandleConfig {
67    /// If [`true`], delay cache reads by about as long as the initial computation took.
68    ///
69    /// Used by URL Cleaner Site Userscript to reduce the ability of websites to tell if you have a URL cached.
70    ///
71    /// Defaults to [`false`].
72    #[serde(default, skip_serializing_if = "is_default")]
73    pub delay: bool,
74    /// If [`false`], make [`CacheHandle::read`] always return [`None`].
75    ///
76    /// Defaults to [`true`].
77    #[serde(default = "get_true", skip_serializing_if = "is_true")]
78    pub read: bool,
79    /// If [`false`], make [`CacheHandle::write`] do nothing.
80    ///
81    /// Defaults to [`true`].
82    #[serde(default = "get_true", skip_serializing_if = "is_true")]
83    pub write: bool
84}
85
86impl Default for CacheHandleConfig {
87    fn default() -> Self {
88        Self {
89            delay: false,
90            read : true,
91            write: true
92        }
93    }
94}
95
96impl CacheHandle<'_> {
97    /// Reads from the cache.
98    /// # Errors
99    /// If the call to [`InnerCache::read`] returns an error, that error is returned.
100    /// # Panics
101    /// If, somehow, [`rand::rngs::OsRng`] doesn't work, this panics when [`Self::config`]'s [`CacheHandleConfig::delay`] is [`true`].
102    pub fn read(&self, keys: CacheEntryKeys) -> Result<Option<CacheEntryValues>, ReadFromCacheError> {
103        if self.config.read {
104            let ret = self.cache.read(keys)?;
105            if self.config.delay && let Some(CacheEntryValues {duration, ..}) = ret {
106                let between_neg_1_and_1 = rand::rngs::OsRng.try_next_u32().expect("Os RNG to be available") as f32 / f32::MAX * 2.0 - 1.0;
107                std::thread::sleep(duration.mul_f32(1.0 + between_neg_1_and_1 / 8.0));
108            }
109            Ok(ret)
110        } else {
111            Ok(None)
112        }
113    }
114
115    /// Writes to the cache.
116    ///
117    /// If an entry for the `subject` and `key` already exists, overwrites it.
118    /// # Errors
119    /// If the call to [`InnerCache::write`] returns an error, that error is returned.
120    pub fn write(&self, entry: NewCacheEntry) -> Result<(), WriteToCacheError> {
121        if self.config.write {
122            self.cache.write(entry)
123        } else {
124            Ok(())
125        }
126    }
127}
128
129diesel::table! {
130    /// The table containing cache entries.
131    cache (subject, key) {
132        /// The subject of the entry.
133        subject -> Text,
134        /// The key of the entry.
135        key -> Text,
136        /// The value of the entry.
137        value -> Nullable<Text>,
138        /// The time the original computation took.
139        duration -> Float
140    }
141}
142
143/// The Sqlite command to initialize the cache database.
144pub const DB_INIT_COMMAND: &str = r#"CREATE TABLE cache (
145    subject TEXT NOT NULL,
146    "key" TEXT NOT NULL,
147    value TEXT,
148    duration FLOAT NOT NULL,
149    UNIQUE(subject, "key") ON CONFLICT REPLACE
150)"#;
151
152/// A new entry for the cache database.
153#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Insertable)]
154#[diesel(table_name = cache)]
155pub struct NewCacheEntry<'a> {
156    /// The subject of the new entry.
157    pub subject: &'a str,
158    /// The key of the new entry.
159    pub key: &'a str,
160    /// The value of the new entry.
161    pub value: Option<&'a str>,
162    /// The time the original computation took.
163    #[diesel(serialize_as = DurationGlue)]
164    pub duration: Duration
165}
166
167/// The keys of a cache entry.
168#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Queryable, Selectable)]
169#[diesel(table_name = cache)]
170pub struct CacheEntryKeys<'a> {
171    /// The subject of the entry.
172    pub subject: &'a str,
173    /// The key of the entry.
174    pub key: &'a str,
175}
176
177/// The values of a cache entry.
178#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Queryable, Selectable)]
179#[diesel(table_name = cache)]
180pub struct CacheEntryValues {
181    /// The value of the entry.
182    pub value: Option<String>,
183    /// The time the original computation took.
184    #[diesel(deserialize_as = DurationGlue)]
185    pub duration: Duration
186}
187
188/// The enum of errors [`Cache::read`] and [`InnerCache::read`] can return.
189#[derive(Debug, Error)]
190pub enum ReadFromCacheError {
191    /// Returned when a [`diesel::result::Error`] is encountered.
192    #[error(transparent)]
193    DieselError(#[from] diesel::result::Error),
194    /// Returned when a [`ConnectCacheError`] is encountered.
195    #[error(transparent)]
196    ConnectCacheError(#[from] ConnectCacheError)
197}
198
199/// The enum of errors [`Cache::read`] and [`InnerCache::read`] can return.
200#[derive(Debug, Error)]
201pub enum WriteToCacheError {
202    /// Returned when a [`diesel::result::Error`] is encountered.
203    #[error(transparent)]
204    DieselError(#[from] diesel::result::Error),
205    /// Returned when a [`ConnectCacheError`] is encountered.
206    #[error(transparent)]
207    ConnectCacheError(#[from] ConnectCacheError)
208}
209
210/// The enum of errors that [`InnerCache::connect`] can return.
211#[derive(Debug, Error)]
212pub enum ConnectCacheError {
213    /// Returned when a [`diesel::ConnectionError`] is encountered.
214    #[error(transparent)]
215    ConnectionError(#[from] diesel::ConnectionError),
216    /// Returned when a [`std::io::Error`] is encountered.
217    #[error(transparent)]
218    IoError(#[from] std::io::Error),
219    /// Returned when a [`diesel::result::Error`] is encountered.
220    #[error(transparent)]
221    DieselError(#[from] diesel::result::Error)
222}