Skip to main content

powerpack_cache/
lib.rs

1//! ⚡ Cache management for your Alfred workflow
2//!
3//! This crate provides a simple cache management system for your Alfred
4//! workflow. Data is cached in the workflow's cache directory and is updated
5//! asynchronously.
6//!
7//! The cache supports arbitrary data types for each key as long as they can be
8//! serialized and deserialized from JSON.
9//!
10//! # Concepts
11//!
12//! - `key`: a unique identifier for a piece of data stored in the cache.
13//!
14//! - `ttl`: the Time To Live (TTL) for the data in the cache. If the data in
15//!   the cache is older than this then it is considered "expired".
16//!
17//! - `checksum`: an optional checksum for a particular cache `key`. You can use
18//!   this to bust the cache for some other reason than the data being expired.
19//!
20//! - `update_fn`: a function that is called to update the cache for a `key`.
21//!   This is typically some operation that is expensive and/or slow and you do
22//!   not want to block the Alfred workflow. This function is called
23//!   asynchronously to update the cache. If the cache is already being updated
24//!   by another process, then the function is not called.
25//!
26//! The following behaviour is determined by the [policy](QueryPolicy) of the
27//! query:
28//! - When to call a provided `update_fn`.
29//! - When to return bad, expired, or checksum mismatched data.
30//!
31//! # Usage
32//!
33//! Use a [`Builder`] to construct a new [`Cache`]`.
34//!
35//! ```no_run
36//! # mod powerpack { pub extern crate powerpack_cache as cache; } // mock re-export
37//! use std::time::Duration;
38//! use powerpack::cache;
39//!
40//! let cache = cache::Builder::new().ttl(Duration::from_secs(60 * 60)).build();
41//! ```
42//!
43//! Then the only function to call is [`.query(..)`][Cache::query] which will
44//! fetch the cached value and/or detach a process to update it.
45//! ```no_run
46//! # use powerpack_cache as cache;
47//! # let mut cache = cache::Builder::new().build();
48//! #
49//! let expensive_fn = |_| {
50//!     // perform some expensive operation, like fetching
51//!     // something over the internet
52//! #   Ok::<String, std::convert::Infallible>(String::from(""))
53//! };
54//!
55//! let q = cache::Query::new("unique_key").update_fn(expensive_fn);
56//! let data = cache.query(q)?;
57//! # Ok::<(), cache::QueryError>(())
58//! ```
59//!
60
61mod query;
62
63use std::error::Error as StdError;
64use std::fs;
65use std::fs::TryLockError;
66use std::io;
67use std::path::{Path, PathBuf};
68use std::thread;
69use std::time::{Duration, Instant, SystemTime};
70
71use flagset::FlagSet;
72use serde::{Deserialize, Serialize};
73use serde_json as json;
74use thiserror::Error;
75
76use powerpack_detach as detach;
77use powerpack_env as env;
78
79pub use crate::query::{Query, QueryError, QueryPolicy};
80
81/// The cache file name, the version indicates the format of the data
82const DATA: &str = "v2.json";
83
84/// The function type for the update function
85pub type UpdateFn<'f, T, E> = Box<dyn FnOnce(Option<PrevEntry<T>>) -> Result<T, E> + 'f>;
86
87/// Raised when constructing a new cache.
88#[derive(Debug, Error)]
89#[non_exhaustive]
90pub enum BuildError {
91    /// Raised when the home directory cannot be determined
92    #[error("home directory not found")]
93    NoHomeDir,
94}
95
96/// Raised when updating data in the cache
97#[derive(Debug, Error)]
98#[non_exhaustive]
99enum UpdateError {
100    /// Raised when an I/O error occurs
101    #[error("io error")]
102    Io(#[from] io::Error),
103
104    /// Raised when a JSON serialization error occurs
105    #[error("serialization error")]
106    Serialize(#[from] json::Error),
107
108    /// Raised when an error occurs in the update function
109    #[error("update fn failed: {0}")]
110    UpdateFn(#[from] Box<dyn StdError + Send + Sync + 'static>),
111}
112
113/// A builder for a cache.
114#[derive(Debug, Clone)]
115pub struct Builder {
116    directory: Option<PathBuf>,
117    query_policy: FlagSet<QueryPolicy>,
118    ttl: Duration,
119    initial_poll: Option<Duration>,
120}
121
122/// Manage a cache of data on disk.
123///
124/// Created using a [`Builder`].
125#[derive(Debug)]
126pub struct Cache {
127    directory: PathBuf,
128    query_policy: FlagSet<QueryPolicy>,
129    ttl: Duration,
130    initial_poll: Option<Duration>,
131}
132
133/// The previous cache entry and metadata.
134///
135/// Passed to the update function to allow more flexible update strategies.
136#[derive(Debug)]
137#[non_exhaustive]
138pub struct PrevEntry<T> {
139    /// The actual cache entry, or an error if it failed to deserialize.
140    pub entry: Result<Entry<T>, json::Error>,
141
142    query_checksum: Option<String>,
143    query_ttl: Duration,
144}
145
146/// The data and metadata stored in the cache.
147///
148// Breaking changes need to bump the version in the cache file name.
149#[derive(Debug, Clone, Deserialize, Serialize)]
150#[non_exhaustive]
151pub struct Entry<T> {
152    /// The time the cache was last modified (pre update)
153    pub pre_update_time: SystemTime,
154    /// The time the cache was last modified (post update)
155    pub post_update_time: SystemTime,
156    /// The checksum specified when the data was stored in the cache
157    pub checksum: Option<String>,
158    /// The data stored in the cache
159    pub data: T,
160}
161
162impl Default for Builder {
163    #[inline]
164    fn default() -> Self {
165        Self::new()
166    }
167}
168
169impl Builder {
170    /// Returns a new cache builder.
171    #[inline]
172    pub fn new() -> Self {
173        Builder {
174            directory: None,
175            query_policy: QueryPolicy::default_set(),
176            ttl: Duration::from_secs(60),
177            initial_poll: None,
178        }
179    }
180
181    /// Set the cache directory.
182    ///
183    /// Defaults to `{alfred_workflow_cache}/cache`
184    ///
185    /// These should be set by Alfred, but if not:
186    /// - `{alfred_workflow_cache}` defaults to `~/Library/Caches/com.runningwithcrayons.Alfred/Workflow Data/{alfred_workflow_bundleid}`
187    /// - `{alfred_workflow_bundleid}` defaults to `powerpack`
188    ///
189    /// See [`powerpack_env::workflow_cache_or_default`] for more information.
190    #[inline]
191    pub fn directory(mut self, directory: impl Into<PathBuf>) -> Self {
192        self.directory = Some(directory.into());
193        self
194    }
195
196    /// Set the query policy for the cache.
197    ///
198    /// This is used to determine things like when updates should occur and
199    /// stale data is allowed to be returned.
200    pub fn policy(mut self, query_policy: impl Into<FlagSet<QueryPolicy>>) -> Self {
201        self.query_policy = query_policy.into();
202        self
203    }
204
205    /// Set the default Time To Live (TTL) for the data in the cache.
206    ///
207    /// This is used if the query does not specify a TTL.
208    ///
209    /// If the data in the cache is older than this then the cache will be
210    /// automatically refreshed. Stale data will be returned in the meantime.
211    ///
212    /// Defaults to 60 seconds.
213    #[inline]
214    pub fn ttl(mut self, ttl: Duration) -> Self {
215        self.ttl = ttl;
216        self
217    }
218
219    /// Set the initial poll duration.
220    ///
221    /// This is used if the query does not specify an initial poll duration.
222    ///
223    /// This is the duration to wait for the cache to be populated on the first
224    /// call. If the cache is not populated within this duration, a miss error
225    /// will be raised.
226    ///
227    /// Defaults to not polling at all. This means the initial call to
228    /// [`.query()`](Cache::query) will return immediately with
229    /// [`Err(QueryError::Miss)`][QueryError::Miss].
230    #[inline]
231    pub fn initial_poll(mut self, initial_poll: Duration) -> Self {
232        self.initial_poll = Some(initial_poll);
233        self
234    }
235
236    /// Try build the cache.
237    ///
238    /// This can fail if the user's home directory cannot be determined.
239    pub fn try_build(self) -> Result<Cache, BuildError> {
240        let Self {
241            directory,
242            query_policy,
243            ttl,
244            initial_poll,
245        } = self;
246
247        let directory = match directory {
248            Some(directory) => directory,
249            None => env::try_workflow_cache_or_default()
250                .ok_or(BuildError::NoHomeDir)?
251                .join("cache"),
252        };
253
254        Ok(Cache {
255            directory,
256            query_policy,
257            ttl,
258            initial_poll,
259        })
260    }
261
262    /// Build the cache.
263    ///
264    /// # Panics
265    ///
266    /// If the user's home directory cannot be determined.
267    #[track_caller]
268    #[inline]
269    pub fn build(self) -> Cache {
270        self.try_build().expect("failed to build cache")
271    }
272}
273
274impl<T> PrevEntry<T> {
275    fn build(buf: &[u8], query_checksum: Option<String>, query_ttl: Duration) -> Self
276    where
277        T: for<'de> Deserialize<'de>,
278    {
279        let entry: Result<Entry<T>, _> = json::from_slice(buf);
280        Self {
281            entry,
282            query_checksum,
283            query_ttl,
284        }
285    }
286
287    fn should_update(&self, policy: FlagSet<QueryPolicy>) -> bool {
288        policy.contains(QueryPolicy::UpdateAlways)
289            || self.is_bad_data() && policy.contains(QueryPolicy::UpdateBadData)
290            || self.is_checksum_mismatch() && policy.contains(QueryPolicy::UpdateChecksumMismatch)
291            || self.is_expired() && policy.contains(QueryPolicy::UpdateExpired)
292    }
293
294    #[rustfmt::skip]
295    fn should_return(&self, policy: FlagSet<QueryPolicy>) -> bool {
296        policy.contains(QueryPolicy::ReturnAlways) || {
297            (!self.is_bad_data() || policy.contains(QueryPolicy::ReturnBadDataErr))
298            && (!self.is_checksum_mismatch() || policy.contains(QueryPolicy::ReturnChecksumMismatch))
299            && (!self.is_expired() || policy.contains(QueryPolicy::ReturnExpired))
300        }
301    }
302
303    fn into_data(self, policy: FlagSet<QueryPolicy>) -> Result<T, QueryError> {
304        if self.should_return(policy) {
305            Ok(self.entry.map(|c| c.data)?)
306        } else {
307            Err(QueryError::Miss)
308        }
309    }
310
311    /// Returns true if the cache entry failed to deserialize
312    #[inline]
313    pub fn is_bad_data(&self) -> bool {
314        self.entry.is_err()
315    }
316
317    /// Returns true if the cache entry has a mismatch with the queried checksum
318    #[inline]
319    pub fn is_checksum_mismatch(&self) -> bool {
320        self.entry.as_ref().is_ok_and(|entry| {
321            self.query_checksum.is_some() && entry.checksum.as_ref() != self.query_checksum.as_ref()
322        })
323    }
324
325    /// Returns true if cache entry is expired based on the queried TTL
326    #[inline]
327    pub fn is_expired(&self) -> bool {
328        self.entry.as_ref().is_ok_and(|entry| {
329            entry
330                .post_update_time
331                .elapsed()
332                .map_or(true, |d| d > self.query_ttl)
333        })
334    }
335}
336
337impl Cache {
338    /// Fetches the cache value according to the [`Query`].
339    pub fn query<'a, T, E>(&self, query: Query<'a, T, E>) -> Result<T, QueryError>
340    where
341        T: Serialize + for<'de> Deserialize<'de>,
342        E: Into<Box<dyn std::error::Error + Send + Sync>>,
343    {
344        let Query {
345            key,
346            checksum,
347            policy,
348            ttl,
349            initial_poll,
350            update_fn,
351        } = query;
352
353        let directory = self.directory.join(key);
354        let path = directory.join(DATA);
355
356        let policy = policy.unwrap_or(self.query_policy);
357        let ttl = ttl.unwrap_or(self.ttl);
358        let initial_poll = initial_poll.or(self.initial_poll).map(|d| {
359            let sleep = (d / 5).min(Duration::from_millis(100)).min(d);
360            (d, sleep)
361        });
362
363        let update_fn = update_fn.map(|f| {
364            let checksum = checksum.clone();
365            |prev_data| match update(&directory, &path, checksum, prev_data, f) {
366                Ok(true) => log::debug!("cache: updated {key}"),
367                Ok(false) => log::debug!("cache: another process updated {key}"),
368                Err(err) => log::error!(
369                    "cache: failed to update {key}: {}",
370                    detach::format_err(&err)
371                ),
372            }
373        });
374
375        match fs::read(&path) {
376            Ok(buf) => {
377                let prev = PrevEntry::build(&buf, checksum, ttl);
378                match update_fn {
379                    Some(update_fn) if prev.should_update(policy) => {
380                        detach::spawn_with(prev, |prev| update_fn(Some(prev)))?
381                    }
382                    _ => prev,
383                }
384                .into_data(policy)
385            }
386
387            Err(err) if err.kind() == io::ErrorKind::NotFound => {
388                if let Some(update_fn) = update_fn {
389                    detach::spawn(|| update_fn(None))?;
390                }
391
392                // wait for the cache to be populated
393                if let Some((poll_duration, poll_sleep)) = initial_poll {
394                    let start = Instant::now();
395                    while Instant::now().duration_since(start) < poll_duration {
396                        thread::sleep(poll_sleep);
397                        match fs::read(&path) {
398                            Ok(buf) => {
399                                return PrevEntry::build(&buf, checksum, ttl).into_data(policy);
400                            }
401                            Err(err) if err.kind() == io::ErrorKind::NotFound => continue,
402                            Err(err) => return Err(err.into()),
403                        }
404                    }
405                }
406
407                Err(QueryError::Miss)
408            }
409
410            Err(err) => Err(err.into()),
411        }
412    }
413}
414
415fn update<'d, 'f, T, E>(
416    directory: &Path,
417    path: &Path,
418    checksum: Option<String>,
419    prev_entry: Option<PrevEntry<T>>,
420    update_fn: UpdateFn<'f, T, E>,
421) -> Result<bool, UpdateError>
422where
423    T: Serialize + for<'de> Deserialize<'de>,
424    E: Into<Box<dyn std::error::Error + Send + Sync>>,
425{
426    fs::create_dir_all(directory)?;
427    let tmp = path.with_extension("tmp");
428
429    match fs::File::open(directory)?.try_lock() {
430        Ok(()) => {
431            let pre_update_time = SystemTime::now();
432            let data = update_fn(prev_entry).map_err(Into::into)?;
433            let post_update_time = SystemTime::now();
434            let file = fs::File::create(&tmp)?;
435            json::to_writer(
436                &file,
437                &Entry {
438                    pre_update_time,
439                    post_update_time,
440                    checksum,
441                    data,
442                },
443            )?;
444            fs::rename(tmp, path)?;
445            Ok(true)
446        }
447        Err(TryLockError::Error(err)) => Err(err.into()),
448        Err(TryLockError::WouldBlock) => Ok(false),
449    }
450}