dir_cache/
lib.rs

1//! # dir-cache - A directory based cache
2//!
3//! Can be useful in cases when iteratively writing code that uses dynamic data that rarely changes.
4//! For example, probing some API.
5//!
6//! The cache is convenient for some use-cases, but not performant and should not be
7//! used in situations were cache-performance is important.
8//!
9use crate::disk::{
10    ensure_dir, ensure_removed_file, read_all_in_dir, read_metadata_if_present,
11    read_raw_if_present, try_remove_dir,
12};
13use crate::error::{Error, Result};
14use crate::opts::{DirCacheOpts, Encoding, GenerationOpt, MemPullOpt, MemPushOpt, SyncOpt};
15use crate::path_util::{relativize, SafePathJoin};
16use crate::time::{duration_from_nano_string, unix_time_now};
17use std::borrow::Cow;
18use std::collections::{HashMap, VecDeque};
19use std::fmt::Write;
20use std::path::{Path, PathBuf};
21use std::time::Duration;
22
23mod disk;
24pub mod error;
25pub mod opts;
26mod path_util;
27mod time;
28
29const MANIFEST_VERSION: u64 = 1;
30const MANIFEST_FILE: &str = "dir-cache-manifest.txt";
31
32/// A directory-based cache with a map-like interface.
33/// # Example
34/// ```
35/// use std::convert::Infallible;
36/// use std::path::Path;
37/// use dir_cache::opts::{CacheOpenOptions, DirCacheOpts, DirOpenOpt};
38/// fn use_cache() {
39///     let temp = tempfile::TempDir::with_prefix("dir-cache-doc-test").unwrap();
40///     let mut dir_cache = DirCacheOpts::default()
41///         .open(temp.path(), CacheOpenOptions::new(DirOpenOpt::OnlyIfExists, false)).unwrap();
42///
43///     // Be careful about paths used, these are joined onto the base directory and
44///     // should ideally not be dynamic
45///     let slow_value_key = Path::new("slow-key");
46///     // Will only execute the closure if the value is not present.
47///     let value = dir_cache.get_or_insert(slow_value_key, || Ok::<_, Infallible>(b"My hard to get value".to_vec())).unwrap();
48///     assert_eq!(b"My hard to get value".as_slice(), value.as_ref());
49/// }
50/// ```
51pub struct DirCache {
52    inner: DirCacheInner,
53    opts: DirCacheOpts,
54}
55
56impl DirCache {
57    /// Get this [`DirCache`]'s [`DirCacheOpts`].
58    /// To change one opt for an operation, for example.
59    #[inline]
60    #[must_use]
61    pub fn opts(&self) -> &DirCacheOpts {
62        &self.opts
63    }
64
65    /// Get the value of a key using this [`DirCache`]'s options.
66    /// Returns [`Option::None`] if the key isn't stored in the cache.
67    /// If the key is stored in the cache it will be retrieved either from memory or disk.
68    /// The value will be owned only if [`MemPullOpt::DontKeepInMemoryOnRead`] is specified
69    /// which is why the return value is a [`Cow<_>`]
70    /// # Errors
71    /// Various io-errors reading and managing disk state
72    #[inline]
73    pub fn get(&mut self, key: &Path) -> Result<Option<Cow<[u8]>>> {
74        self.inner
75            .get_opt(key, self.opts.mem_pull_opt, self.opts.generation_opt)
76    }
77
78    /// Same as [`DirCache::get`] but with opts other than what the [`DirCache`] was instantiated
79    /// with.
80    /// # Errors
81    /// Same as [`DirCache::get`]
82    #[inline]
83    pub fn get_opt(&mut self, key: &Path, opts: DirCacheOpts) -> Result<Option<Cow<[u8]>>> {
84        self.inner
85            .get_opt(key, opts.mem_pull_opt, opts.generation_opt)
86    }
87
88    /// Get a key if it exists and is valid according to [`GenerationOpt`], otherwise
89    /// use the provided `insert_with` function to generate and insert a key.
90    /// The return value is a [`Cow<_>`] which is borrowed if [`MemPushOpt::MemoryOnly`] or [`MemPushOpt::RetainAndWrite`] is
91    /// specified, or owned otherwise.
92    /// # Errors
93    /// Accepts a fallible function which can fail, in which case that function's converted
94    /// error is returned wrapped.
95    /// May also perform disk-operations based on opts, which may fail.
96    /// Additionally, will fail on paths that are not safe to use with [`DirCache`]
97    #[inline]
98    pub fn get_or_insert<
99        E: Into<Box<dyn std::error::Error>>,
100        F: FnOnce() -> core::result::Result<Vec<u8>, E>,
101    >(
102        &mut self,
103        key: &Path,
104        insert_with: F,
105    ) -> Result<Cow<[u8]>> {
106        self.inner.get_or_insert_opt(
107            key,
108            insert_with,
109            self.opts.mem_pull_opt,
110            self.opts.mem_push_opt,
111            self.opts.generation_opt,
112        )
113    }
114
115    /// Same as [`DirCache::get_or_insert`] but with [`DirCacheOpts`] different from what
116    /// this [`DirCache`] was instantiated with.
117    /// # Errors
118    /// Same as [`DirCache::get_or_insert`]
119    #[inline]
120    pub fn get_or_insert_opt<
121        E: Into<Box<dyn std::error::Error>>,
122        F: FnOnce() -> core::result::Result<Vec<u8>, E>,
123    >(
124        &mut self,
125        key: &Path,
126        insert_with: F,
127        opts: DirCacheOpts,
128    ) -> Result<Cow<[u8]>> {
129        self.inner.get_or_insert_opt(
130            key,
131            insert_with,
132            opts.mem_pull_opt,
133            opts.mem_push_opt,
134            opts.generation_opt,
135        )
136    }
137
138    /// Insert `content` as a value for the provided `key` into this [`DirCache`].
139    /// Will result in direct writes to disk if [`MemPushOpt::MemoryOnly`] isn't used.
140    /// If [`MemPushOpt::MemoryOnly`] isn't used and [`GenerationOpt`] specifies more
141    /// than one generation, a new generation will be written to disk, and previous generations
142    /// will age.
143    /// # Errors
144    /// Will error on using a key that's not safe to use with [`DirCache`].
145    /// May error on various io-errors relating to writing to disk.
146    #[inline]
147    pub fn insert(&mut self, key: &Path, content: Vec<u8>) -> Result<()> {
148        self.inner.insert_opt(
149            key,
150            content,
151            self.opts.mem_push_opt,
152            self.opts.generation_opt,
153        )
154    }
155
156    /// Insert `content` as a value for the provided `key` using the specified `opts` instead
157    /// of the [`DirCacheOpts`] that this [`DirCache`] was instantiated with, otherwise same as [`DirCache::insert`].
158    /// # Errors
159    /// Same as [`DirCache::insert`]
160    #[inline]
161    pub fn insert_opt(&mut self, key: &Path, content: Vec<u8>, opts: DirCacheOpts) -> Result<()> {
162        self.inner
163            .insert_opt(key, content, opts.mem_push_opt, opts.generation_opt)
164    }
165
166    /// Removes a key from the map, and cleans up the state left on disk.
167    /// # Errors
168    /// Various io-errors relating to probing and deleting content from disk
169    #[inline]
170    pub fn remove(&mut self, key: &Path) -> Result<bool> {
171        self.inner.remove(key)
172    }
173
174    /// Sync in-memory written content to disk, same as [`DirCache::sync`].
175    /// If [`SyncOpt::ManualSync`] and [`MemPushOpt::MemoryOnly`] are both enabled,
176    /// calling this method is the only way to flush map-state to disk.
177    /// # Errors
178    /// Various io-errors related to writing to disk
179    #[inline]
180    pub fn sync(&mut self) -> Result<()> {
181        self.inner
182            .sync_to_disk(self.opts.mem_push_opt, self.opts.generation_opt)
183    }
184
185    /// Sync in-memory written content to disk, same as [`DirCache::sync`] but with options
186    /// different to those this [`DirCache`] was instantiated with.
187    /// # Errors
188    /// Same as [`DirCache::sync`]
189    #[inline]
190    pub fn sync_opt(&mut self, opts: DirCacheOpts) -> Result<()> {
191        self.inner
192            .sync_to_disk(opts.mem_push_opt, opts.generation_opt)
193    }
194}
195
196impl Drop for DirCache {
197    fn drop(&mut self) {
198        if matches!(self.opts.sync_opt, SyncOpt::SyncOnDrop) {
199            let _ = self
200                .inner
201                .sync_to_disk(self.opts.mem_push_opt, self.opts.generation_opt);
202        }
203    }
204}
205
206struct DirCacheInner {
207    base: PathBuf,
208    store: HashMap<PathBuf, DirCacheEntry>,
209}
210
211impl DirCacheInner {
212    fn get_opt(
213        &mut self,
214        key: &Path,
215        mem_pull_opt: MemPullOpt,
216        generation_opt: GenerationOpt,
217    ) -> Result<Option<Cow<[u8]>>> {
218        // Borrow checker...
219        if !self.store.contains_key(key) {
220            return Ok(None);
221        }
222        let val = self.store.get(key).unwrap();
223        let now = unix_time_now()?;
224        let path = self.base.safe_join(key)?;
225        // To be able to remove this key, the below Cow borrow-return needs a separate borrow lasting
226        // for the remainder of this function, so here we are.
227        if val
228            .last_updated
229            .saturating_add(generation_opt.expiration.as_dur())
230            <= now
231        {
232            // The value in memory should be younger or equal to the first value on disk
233            // if it's too old, this key should be cleaned
234            try_remove_dir(&path)?;
235            self.store.remove(key);
236            return Ok(None);
237        }
238
239        if let Some(f) = val.on_disk.front() {
240            if f.age.saturating_add(generation_opt.expiration.as_dur()) <= now {
241                // No value in mem, also first value on disk is too old, clean up
242                try_remove_dir(&path)?;
243                self.store.remove(key);
244                return Ok(None);
245            }
246        } else if val.in_mem.is_none() {
247            // No value in mem, no values on disk, clean
248            try_remove_dir(&path)?;
249            self.store.remove(key);
250            return Ok(None);
251        }
252
253        let val_ref_in_mem = &mut self.store.get_mut(key).unwrap().in_mem;
254        let store = if let Some(in_mem) = val_ref_in_mem {
255            return Ok(Some(Cow::Borrowed(in_mem.content.as_slice())));
256        } else {
257            let file_path = path.safe_join("dir-cache-generation-0")?;
258            let val = read_raw_if_present(&file_path)?.ok_or_else(|| {
259                Error::ReadContent(
260                    format!("No file present on disk where expected at {file_path:?}"),
261                    None,
262                )
263            })?;
264            if matches!(mem_pull_opt, MemPullOpt::DontKeepInMemoryOnRead) {
265                return Ok(Some(Cow::Owned(val)));
266            }
267            val
268        };
269        *val_ref_in_mem = Some(InMemEntry {
270            committed: true,
271            content: store,
272        });
273        Ok(Some(Cow::Borrowed(
274            val_ref_in_mem.as_ref().unwrap().content.as_slice(),
275        )))
276    }
277
278    fn get_or_insert_opt<
279        E: Into<Box<dyn std::error::Error>>,
280        F: FnOnce() -> core::result::Result<Vec<u8>, E>,
281    >(
282        &mut self,
283        key: &Path,
284        insert_with: F,
285        mem_pull_opt: MemPullOpt,
286        mem_push_opt: MemPushOpt,
287        generation_opt: GenerationOpt,
288    ) -> Result<Cow<[u8]>> {
289        // Dumb borrow checker, going to end up here on an if let https://blog.rust-lang.org/inside-rust/2023/10/06/polonius-update.html
290        if self.store.contains_key(key) {
291            return Ok(self.get_opt(key, mem_pull_opt, generation_opt)?.unwrap());
292        }
293        let val = match insert_with() {
294            Ok(val) => val,
295            Err(e) => {
296                return Err(Error::InsertWithErr(e.into()));
297            }
298        };
299        let mut entry = DirCacheEntry::new();
300        let use_path = self.base.safe_join(key)?;
301        ensure_dir(&use_path)?;
302        entry.insert_new_data(&use_path, val, mem_push_opt, generation_opt)?;
303        self.store.insert(key.to_path_buf(), entry);
304        Ok(self.get_opt(key, mem_pull_opt, generation_opt)?.unwrap())
305    }
306
307    fn insert_opt(
308        &mut self,
309        key: &Path,
310        content: Vec<u8>,
311        mem_push_opt: MemPushOpt,
312        generation_opt: GenerationOpt,
313    ) -> Result<()> {
314        // Borrow checker strikes again
315        let path = self.base.safe_join(key)?;
316        if self.store.contains_key(key) {
317            let existing = self.store.get_mut(key).unwrap();
318            Self::run_dir_cache_entry_write(
319                existing,
320                &path,
321                content,
322                mem_push_opt,
323                generation_opt,
324            )?;
325        } else {
326            let mut dc = DirCacheEntry::new();
327            Self::run_dir_cache_entry_write(&mut dc, &path, content, mem_push_opt, generation_opt)?;
328            self.store.insert(key.to_path_buf(), dc);
329        }
330        Ok(())
331    }
332
333    fn remove(&mut self, key: &Path) -> Result<bool> {
334        let Some(_prev) = self.store.remove(key) else {
335            return Ok(false);
336        };
337        let path = self.base.safe_join(key)?;
338        try_remove_dir(&path)?;
339        Ok(true)
340    }
341
342    fn run_dir_cache_entry_write(
343        dc: &mut DirCacheEntry,
344        path: &Path,
345        content: Vec<u8>,
346        mem_push_opt: MemPushOpt,
347        generation_opt: GenerationOpt,
348    ) -> Result<()> {
349        match mem_push_opt {
350            MemPushOpt::RetainAndWrite => {
351                ensure_dir(path)?;
352                dc.generational_write(
353                    path,
354                    &content,
355                    generation_opt.old_gen_encoding,
356                    generation_opt.max_generations.get(),
357                )?;
358                dc.in_mem = Some(InMemEntry {
359                    committed: true,
360                    content,
361                });
362            }
363            MemPushOpt::MemoryOnly => {
364                dc.in_mem = Some(InMemEntry {
365                    committed: false,
366                    content,
367                });
368                dc.last_updated = unix_time_now()?;
369            }
370            MemPushOpt::PassthroughWrite => {
371                dc.in_mem = None;
372                ensure_dir(path)?;
373                dc.generational_write(
374                    path,
375                    &content,
376                    generation_opt.old_gen_encoding,
377                    generation_opt.max_generations.get(),
378                )?;
379            }
380        }
381        Ok(())
382    }
383
384    fn sync_to_disk(
385        &mut self,
386        mem_push_opt: MemPushOpt,
387        generation_opt: GenerationOpt,
388    ) -> Result<()> {
389        for (k, v) in &mut self.store {
390            let dir = self.base.safe_join(k)?;
391            ensure_dir(&dir)?;
392            let max_rem = generation_opt.max_generations.get();
393            v.dump_in_mem(
394                &dir,
395                matches!(mem_push_opt, MemPushOpt::RetainAndWrite),
396                max_rem,
397                generation_opt.old_gen_encoding,
398            )?;
399        }
400        Ok(())
401    }
402
403    fn read_from_disk(
404        base: PathBuf,
405        eager_load: bool,
406        generation_opt: GenerationOpt,
407    ) -> Result<Self> {
408        let mut check_next = VecDeque::new();
409        check_next.push_front(base.clone());
410        let mut store = HashMap::new();
411        while let Some(next) = check_next.pop_front() {
412            let entry = DirCacheEntry::read_from_dir(&next, eager_load, generation_opt)?;
413            read_all_in_dir(&next, |entry_path, entry_metadata| {
414                if entry_metadata.is_dir() {
415                    check_next.push_back(entry_path.to_path_buf());
416                }
417                Ok(())
418            })?;
419            if let Some(de) = entry {
420                let relative = relativize(&base, &next)?;
421                store.insert(relative, de);
422            }
423        }
424        Ok(Self { base, store })
425    }
426}
427
428struct DirCacheEntry {
429    in_mem: Option<InMemEntry>,
430    on_disk: VecDeque<ContentGeneration>,
431    last_updated: Duration,
432}
433
434impl DirCacheEntry {
435    #[must_use]
436    const fn new() -> Self {
437        Self {
438            in_mem: None,
439            on_disk: VecDeque::new(),
440            last_updated: Duration::ZERO,
441        }
442    }
443
444    fn insert_new_data(
445        &mut self,
446        path: &Path,
447        data: Vec<u8>,
448        mem_push_opt: MemPushOpt,
449        generation_opt: GenerationOpt,
450    ) -> Result<()> {
451        match mem_push_opt {
452            MemPushOpt::RetainAndWrite => {
453                self.generational_write(
454                    path,
455                    &data,
456                    generation_opt.old_gen_encoding,
457                    generation_opt.max_generations.get(),
458                )?;
459                self.in_mem = Some(InMemEntry {
460                    committed: false,
461                    content: data,
462                });
463            }
464            MemPushOpt::MemoryOnly => {
465                self.in_mem = Some(InMemEntry {
466                    committed: false,
467                    content: data,
468                });
469                self.last_updated = unix_time_now()?;
470            }
471            MemPushOpt::PassthroughWrite => {
472                self.generational_write(
473                    path,
474                    &data,
475                    generation_opt.old_gen_encoding,
476                    generation_opt.max_generations.get(),
477                )?;
478            }
479        }
480        Ok(())
481    }
482
483    fn generational_write(
484        &mut self,
485        base: &Path,
486        data: &[u8],
487        old_gen_encoding: Encoding,
488        max_rem: usize,
489    ) -> Result<()> {
490        while self.on_disk.len() > max_rem {
491            let file_name = format!("dir-cache-generation-{}", self.on_disk.len());
492            let file = base.safe_join(&file_name)?;
493            ensure_removed_file(&file)?;
494            self.on_disk.pop_back();
495        }
496        let mut gen_queue = VecDeque::with_capacity(max_rem);
497        for (ind, gen) in self.on_disk.drain(..).enumerate().take(max_rem - 1).rev() {
498            let n1 = base.safe_join(format!("dir-cache-generation-{ind}"))?;
499            let n2 = base.safe_join(format!("dir-cache-generation-{}", ind + 1))?;
500            if ind == 0 && !matches!(old_gen_encoding, Encoding::Plain) {
501                let content = std::fs::read(&n1).map_err(|e| {
502                    Error::ReadContent(
503                        format!("Failed to read first generation from {n1:?}"),
504                        Some(e),
505                    )
506                })?;
507                let new_content = old_gen_encoding.encode(content)?;
508                std::fs::write(&n2, new_content).map_err(|e| {
509                    Error::WriteContent(
510                        format!("Failed to write encoded content to {n2:?}"),
511                        Some(e),
512                    )
513                })?;
514                // Don't need to remove the old file, it'll be overwritten on the next loop, or in the next step
515            } else {
516                // No recoding necessary, just replace
517                std::fs::rename(&n1, &n2).map_err(|e| {
518                    Error::WriteContent(
519                        format!("Failed to migrate generations from {n1:?} to {n2:?}"),
520                        Some(e),
521                    )
522                })?;
523            }
524            gen_queue.push_front(gen);
525        }
526        let last_update = unix_time_now()?;
527        let next_gen = ContentGeneration {
528            encoding: Encoding::Plain,
529            age: last_update,
530        };
531        self.on_disk.push_front(next_gen);
532        for old in gen_queue {
533            self.on_disk.push_back(old);
534        }
535        self.last_updated = last_update;
536        let next_gen_path = base.safe_join("dir-cache-generation-0")?;
537        std::fs::write(&next_gen_path, data).map_err(|e| {
538            Error::WriteContent(
539                format!("Failed to write new generation to {next_gen_path:?}"),
540                Some(e),
541            )
542        })?;
543        self.dump_metadata(base)?;
544        Ok(())
545    }
546
547    fn read_from_dir(
548        base: &Path,
549        eager_load: bool,
550        generation_opt: GenerationOpt,
551    ) -> Result<Option<Self>> {
552        let Some((version, entries)) = Self::read_metadata(base)? else {
553            return Ok(None);
554        };
555        if version != MANIFEST_VERSION {
556            return Err(Error::ParseManifest(format!(
557                "Version mismatch, want={MANIFEST_VERSION}, got={version}"
558            )));
559        }
560        let now = unix_time_now()?;
561        let mut in_mem = None;
562        let mut on_disk = VecDeque::with_capacity(entries.len());
563        let mut last_updated = None;
564        for (ind, (age, enc)) in entries.into_iter().enumerate() {
565            if age.saturating_add(generation_opt.expiration.as_dur()) <= now {
566                ensure_removed_file(&base.safe_join(format!("dir-cache-generation-{ind}"))?)?;
567                continue;
568            }
569            if ind == 0 {
570                last_updated = Some(age);
571                if eager_load {
572                    let path = base.safe_join(format!("dir-cache-generation-{ind}"))?;
573                    let content = std::fs::read(&path).map_err(|e| {
574                        Error::ReadContent(
575                            format!("Failed to eager load content from {path:?}"),
576                            Some(e),
577                        )
578                    })?;
579                    in_mem = Some(InMemEntry {
580                        committed: true,
581                        content,
582                    });
583                }
584            }
585            on_disk.push_back(ContentGeneration { encoding: enc, age });
586        }
587        if let Some(last_updated) = last_updated {
588            Ok(Some(Self {
589                in_mem,
590                on_disk,
591                last_updated,
592            }))
593        } else {
594            Ok(None)
595        }
596    }
597
598    #[allow(clippy::type_complexity)]
599    fn read_metadata(base: &Path) -> Result<Option<(u64, VecDeque<(Duration, Encoding)>)>> {
600        let Some(content) = read_metadata_if_present(&base.safe_join(MANIFEST_FILE)?)? else {
601            return Ok(None);
602        };
603        let mut lines = content.lines();
604        let Some(first) = lines.next() else {
605            return Err(Error::ParseMetadata(format!(
606                "Manifest at {base:?} was empty"
607            )));
608        };
609        let version: u64 = first.parse().map_err(|_| {
610            Error::ParseMetadata(format!("Failed to parse version from metadata at {base:?}"))
611        })?;
612        let mut generations = VecDeque::new();
613        for line in lines {
614            let (age_nanos_raw, encoding_raw) = line.split_once(',').ok_or_else(|| {
615                Error::ParseMetadata(format!("Metadata was not comma separated at {base:?}"))
616            })?;
617            let age = duration_from_nano_string(age_nanos_raw)?;
618            let encoding = Encoding::deserialize(encoding_raw)?;
619            generations.push_front((age, encoding));
620        }
621        Ok(Some((version, generations)))
622    }
623
624    fn dump_in_mem(
625        &mut self,
626        base: &Path,
627        keep_in_mem: bool,
628        keep_generations: usize,
629        old_gen_encoding: Encoding,
630    ) -> Result<()> {
631        let maybe_in_mem = self.in_mem.take();
632        if let Some(mut in_mem) = maybe_in_mem {
633            if !in_mem.committed {
634                self.generational_write(base, &in_mem.content, old_gen_encoding, keep_generations)?;
635                if keep_in_mem {
636                    in_mem.committed = true;
637                    self.in_mem = Some(in_mem);
638                }
639                return Ok(());
640            }
641        }
642        self.dump_metadata(base)?;
643        Ok(())
644    }
645
646    fn dump_metadata(&self, base: &Path) -> Result<()> {
647        let mut metadata = format!("{MANIFEST_VERSION}\n");
648        for gen in &self.on_disk {
649            let _ = metadata.write_fmt(format_args!(
650                "{},{}\n",
651                gen.age.as_nanos(),
652                gen.encoding.serialize()
653            ));
654        }
655        let manifest_path = base.safe_join(MANIFEST_FILE)?;
656        std::fs::write(&manifest_path, metadata).map_err(|e| {
657            Error::WriteContent(
658                format!("Failed to write manifest to {manifest_path:?}"),
659                Some(e),
660            )
661        })?;
662        Ok(())
663    }
664}
665
666struct InMemEntry {
667    committed: bool,
668    content: Vec<u8>,
669}
670
671#[derive(Debug, Clone, Copy)]
672struct ContentGeneration {
673    encoding: Encoding,
674    age: Duration,
675}