url_cleaner_engine/glue/caching.rs
1//! Caching to allow for only expanding redirects the first time you encounter them.
2//!
3//! A cache is a Sqlite database containing one table, "cache", with 4 columns:
4//!
5//! - `subject` (`TEXT NOT NULL`): The subject of the cache entry. For example, redirects have their `subject` set to `redirect`.
6//!
7//! - `key` (`TEXT NOT NULL`): The key of the key/value pair. For example, redirects have their `key` set to the redirect URL.
8//!
9//! - `value` (`TEXT` (maybe null)): The value of the key/value pair. For example, redirects have their `value` set to the URL the starting redirect URL points to.
10//!
11//! - `duration` (`FLOAT`): The amount of time (in seconds) it took to do the thing being cached. For example, redirects have their `duration` set to about as long as it took to do the network request(s). This is used by [`CacheHandle`] to artificially delay cache reads if [`CacheHandleConfig::delay`] is [`true`] to reduce the ability of websites to tell if you've seen a certain URL before.
12//!
13//! Every pair of `subject` and `key` is unique.
14
15use std::time::Duration;
16
17use thiserror::Error;
18use serde::{Serialize, Deserialize};
19use diesel::prelude::*;
20use rand::TryRngCore;
21
22#[expect(unused_imports, reason = "Used in docs.")]
23use crate::types::*;
24use crate::util::*;
25
26pub mod path;
27pub use path::*;
28pub mod inner;
29pub use inner::*;
30pub mod outer;
31pub use outer::*;
32pub mod glue;
33pub use glue::*;
34
35/// A wrapper around a [`Cache`] for optional security features provided by [`CacheHandleConfig`].
36///
37/// Unlike [`Cache`], which is intended to be shared between [`Job`]s, [`CacheHandle`]s are intended to be made on a per-[`Job`] basis using the [`CacheHandleConfig`] appropriate for each particular [`Job`].
38///
39/// For example, a CLI program writing results to a file doesn't need to enable cache delay/unthreading, but a userscript should.
40/// # Examples
41/// ```
42/// use url_cleaner_engine::glue::*;
43/// use std::time::Duration;
44///
45/// let cache = CacheHandle {
46/// cache: &Default::default(),
47/// config: Default::default()
48/// };
49///
50/// assert_eq!(cache.read(CacheEntryKeys { subject: "subject", key: "key" }).unwrap().map(|entry| entry.value), None);
51/// cache.write(NewCacheEntry { subject: "subject", key: "key", value: None, duration: Default::default() }).unwrap();
52/// assert_eq!(cache.read(CacheEntryKeys { subject: "subject", key: "key" }).unwrap().map(|entry| entry.value), Some(None));
53/// cache.write(NewCacheEntry { subject: "subject", key: "key", value: Some("value"), duration: Default::default() }).unwrap();
54/// assert_eq!(cache.read(CacheEntryKeys { subject: "subject", key: "key" }).unwrap().map(|entry| entry.value), Some(Some("value".into())));
55/// ```
56#[derive(Debug, Clone, Copy)]
57pub struct CacheHandle<'a> {
58 /// The [`Cache`].
59 pub cache: &'a Cache,
60 /// The [`CacheHandleConfig`].
61 pub config: CacheHandleConfig
62}
63
64/// Configuration for how a [`CacheHandle`] should behave.
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
66pub struct CacheHandleConfig {
67 /// If [`true`], delay cache reads by about as long as the initial computation took.
68 ///
69 /// Used by URL Cleaner Site Userscript to reduce the ability of websites to tell if you have a URL cached.
70 ///
71 /// Defaults to [`false`].
72 #[serde(default, skip_serializing_if = "is_default")]
73 pub delay: bool,
74 /// If [`false`], make [`CacheHandle::read`] always return [`None`].
75 ///
76 /// Defaults to [`true`].
77 #[serde(default = "get_true", skip_serializing_if = "is_true")]
78 pub read: bool,
79 /// If [`false`], make [`CacheHandle::write`] do nothing.
80 ///
81 /// Defaults to [`true`].
82 #[serde(default = "get_true", skip_serializing_if = "is_true")]
83 pub write: bool
84}
85
86impl Default for CacheHandleConfig {
87 fn default() -> Self {
88 Self {
89 delay: false,
90 read : true,
91 write: true
92 }
93 }
94}
95
96impl CacheHandle<'_> {
97 /// Reads from the cache.
98 /// # Errors
99 /// If the call to [`InnerCache::read`] returns an error, that error is returned.
100 /// # Panics
101 /// If, somehow, [`rand::rngs::OsRng`] doesn't work, this panics when [`Self::config`]'s [`CacheHandleConfig::delay`] is [`true`].
102 pub fn read(&self, keys: CacheEntryKeys) -> Result<Option<CacheEntryValues>, ReadFromCacheError> {
103 if self.config.read {
104 let ret = self.cache.read(keys)?;
105 if self.config.delay && let Some(CacheEntryValues {duration, ..}) = ret {
106 let between_neg_1_and_1 = rand::rngs::OsRng.try_next_u32().expect("Os RNG to be available") as f32 / f32::MAX * 2.0 - 1.0;
107 std::thread::sleep(duration.mul_f32(1.0 + between_neg_1_and_1 / 8.0));
108 }
109 Ok(ret)
110 } else {
111 Ok(None)
112 }
113 }
114
115 /// Writes to the cache.
116 ///
117 /// If an entry for the `subject` and `key` already exists, overwrites it.
118 /// # Errors
119 /// If the call to [`InnerCache::write`] returns an error, that error is returned.
120 pub fn write(&self, entry: NewCacheEntry) -> Result<(), WriteToCacheError> {
121 if self.config.write {
122 self.cache.write(entry)
123 } else {
124 Ok(())
125 }
126 }
127}
128
129diesel::table! {
130 /// The table containing cache entries.
131 cache (subject, key) {
132 /// The subject of the entry.
133 subject -> Text,
134 /// The key of the entry.
135 key -> Text,
136 /// The value of the entry.
137 value -> Nullable<Text>,
138 /// The time the original computation took.
139 duration -> Float
140 }
141}
142
143/// The Sqlite command to initialize the cache database.
144pub const DB_INIT_COMMAND: &str = r#"CREATE TABLE cache (
145 subject TEXT NOT NULL,
146 "key" TEXT NOT NULL,
147 value TEXT,
148 duration FLOAT NOT NULL,
149 UNIQUE(subject, "key") ON CONFLICT REPLACE
150)"#;
151
152/// A new entry for the cache database.
153#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Insertable)]
154#[diesel(table_name = cache)]
155pub struct NewCacheEntry<'a> {
156 /// The subject of the new entry.
157 pub subject: &'a str,
158 /// The key of the new entry.
159 pub key: &'a str,
160 /// The value of the new entry.
161 pub value: Option<&'a str>,
162 /// The time the original computation took.
163 #[diesel(serialize_as = DurationGlue)]
164 pub duration: Duration
165}
166
167/// The keys of a cache entry.
168#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Queryable, Selectable)]
169#[diesel(table_name = cache)]
170pub struct CacheEntryKeys<'a> {
171 /// The subject of the entry.
172 pub subject: &'a str,
173 /// The key of the entry.
174 pub key: &'a str,
175}
176
177/// The values of a cache entry.
178#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Queryable, Selectable)]
179#[diesel(table_name = cache)]
180pub struct CacheEntryValues {
181 /// The value of the entry.
182 pub value: Option<String>,
183 /// The time the original computation took.
184 #[diesel(deserialize_as = DurationGlue)]
185 pub duration: Duration
186}
187
188/// The enum of errors [`Cache::read`] and [`InnerCache::read`] can return.
189#[derive(Debug, Error)]
190pub enum ReadFromCacheError {
191 /// Returned when a [`diesel::result::Error`] is encountered.
192 #[error(transparent)]
193 DieselError(#[from] diesel::result::Error),
194 /// Returned when a [`ConnectCacheError`] is encountered.
195 #[error(transparent)]
196 ConnectCacheError(#[from] ConnectCacheError)
197}
198
199/// The enum of errors [`Cache::read`] and [`InnerCache::read`] can return.
200#[derive(Debug, Error)]
201pub enum WriteToCacheError {
202 /// Returned when a [`diesel::result::Error`] is encountered.
203 #[error(transparent)]
204 DieselError(#[from] diesel::result::Error),
205 /// Returned when a [`ConnectCacheError`] is encountered.
206 #[error(transparent)]
207 ConnectCacheError(#[from] ConnectCacheError)
208}
209
210/// The enum of errors that [`InnerCache::connect`] can return.
211#[derive(Debug, Error)]
212pub enum ConnectCacheError {
213 /// Returned when a [`diesel::ConnectionError`] is encountered.
214 #[error(transparent)]
215 ConnectionError(#[from] diesel::ConnectionError),
216 /// Returned when a [`std::io::Error`] is encountered.
217 #[error(transparent)]
218 IoError(#[from] std::io::Error),
219 /// Returned when a [`diesel::result::Error`] is encountered.
220 #[error(transparent)]
221 DieselError(#[from] diesel::result::Error)
222}