lsm_tree/
cache.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::table::block::Header;
6use crate::table::{Block, BlockOffset};
7use crate::{GlobalTableId, UserValue};
8use quick_cache::sync::Cache as QuickCache;
9use quick_cache::Weighter;
10
11const TAG_BLOCK: u8 = 0;
12const TAG_BLOB: u8 = 1;
13
14#[derive(Clone)]
15enum Item {
16    Block(Block),
17    Blob(UserValue),
18}
19
20#[derive(Eq, std::hash::Hash, PartialEq)]
21struct CacheKey(u8, u64, u64, u64);
22
23impl From<(u8, u64, u64, u64)> for CacheKey {
24    fn from((tag, root_id, table_id, offset): (u8, u64, u64, u64)) -> Self {
25        Self(tag, root_id, table_id, offset)
26    }
27}
28
29#[derive(Clone)]
30struct BlockWeighter;
31
32impl Weighter<CacheKey, Item> for BlockWeighter {
33    fn weight(&self, _: &CacheKey, item: &Item) -> u64 {
34        use Item::{Blob, Block};
35
36        match item {
37            Block(b) => (Header::serialized_len() as u64) + u64::from(b.header.uncompressed_length),
38            Blob(b) => b.len() as u64,
39        }
40    }
41}
42
43/// Cache, in which blocks or blobs are cached in-memory
44/// after being retrieved from disk
45///
46/// This speeds up consecutive queries to nearby data, improving
47/// read performance for hot data.
48///
49/// # Examples
50///
51/// Sharing cache between multiple trees
52///
53/// ```
54/// # use lsm_tree::{Tree, Config, Cache};
55/// # use std::sync::Arc;
56/// #
57/// // Provide 64 MB of cache capacity
58/// let cache = Arc::new(Cache::with_capacity_bytes(64 * 1_000 * 1_000));
59///
60/// # let folder = tempfile::tempdir()?;
61/// let tree1 = Config::new(folder, Default::default(), Default::default()).use_cache(cache.clone()).open()?;
62/// # let folder = tempfile::tempdir()?;
63/// let tree2 = Config::new(folder, Default::default(), Default::default()).use_cache(cache.clone()).open()?;
64/// #
65/// # Ok::<(), lsm_tree::Error>(())
66/// ```
67pub struct Cache {
68    // NOTE: rustc_hash performed best: https://fjall-rs.github.io/post/fjall-2-1
69    /// Concurrent cache implementation
70    data: QuickCache<CacheKey, Item, BlockWeighter, rustc_hash::FxBuildHasher>,
71
72    /// Capacity in bytes
73    capacity: u64,
74}
75
76impl Cache {
77    /// Creates a new block cache with roughly `n` bytes of capacity.
78    #[must_use]
79    pub fn with_capacity_bytes(bytes: u64) -> Self {
80        use quick_cache::sync::DefaultLifecycle;
81
82        #[expect(clippy::expect_used, reason = "nothing we can do if it fails")]
83        let opts = quick_cache::OptionsBuilder::new()
84            .weight_capacity(bytes)
85            .hot_allocation(0.8)
86            .estimated_items_capacity(10_000)
87            .build()
88            .expect("cache options should be valid");
89
90        let quick_cache = QuickCache::with_options(
91            opts,
92            BlockWeighter,
93            rustc_hash::FxBuildHasher,
94            DefaultLifecycle::default(),
95        );
96
97        Self {
98            data: quick_cache,
99            capacity: bytes,
100        }
101    }
102
103    /// Returns the amount of cached bytes.
104    #[must_use]
105    pub fn size(&self) -> u64 {
106        self.data.weight()
107    }
108
109    /// Returns the cache capacity in bytes.
110    #[must_use]
111    pub fn capacity(&self) -> u64 {
112        self.capacity
113    }
114
115    #[doc(hidden)]
116    #[must_use]
117    pub fn get_block(&self, id: GlobalTableId, offset: BlockOffset) -> Option<Block> {
118        let key: CacheKey = (TAG_BLOCK, id.tree_id(), id.table_id(), *offset).into();
119
120        Some(match self.data.get(&key)? {
121            Item::Block(block) => block,
122            Item::Blob(_) => unreachable!("invalid cache item"),
123        })
124    }
125
126    #[doc(hidden)]
127    pub fn insert_block(&self, id: GlobalTableId, offset: BlockOffset, block: Block) {
128        self.data.insert(
129            (TAG_BLOCK, id.tree_id(), id.table_id(), *offset).into(),
130            Item::Block(block),
131        );
132    }
133
134    #[doc(hidden)]
135    pub fn insert_blob(
136        &self,
137        vlog_id: crate::TreeId,
138        vhandle: &crate::vlog::ValueHandle,
139        value: UserValue,
140    ) {
141        self.data.insert(
142            (TAG_BLOB, vlog_id, vhandle.blob_file_id, vhandle.offset).into(),
143            Item::Blob(value),
144        );
145    }
146
147    #[doc(hidden)]
148    #[must_use]
149    pub fn get_blob(
150        &self,
151        vlog_id: crate::TreeId,
152        vhandle: &crate::vlog::ValueHandle,
153    ) -> Option<UserValue> {
154        let key: CacheKey = (TAG_BLOB, vlog_id, vhandle.blob_file_id, vhandle.offset).into();
155
156        Some(match self.data.get(&key)? {
157            Item::Blob(blob) => blob,
158            Item::Block(_) => unreachable!("invalid cache item"),
159        })
160    }
161}