Skip to main content

roughdb/cache/
mod.rs

1//    Licensed under the Apache License, Version 2.0 (the "License");
2//    you may not use this file except in compliance with the License.
3//    You may obtain a copy of the License at
4//
5//        http://www.apache.org/licenses/LICENSE-2.0
6//
7//    Unless required by applicable law or agreed to in writing, software
8//    distributed under the License is distributed on an "AS IS" BASIS,
9//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10//    See the License for the specific language governing permissions and
11//    limitations under the License.
12
13//! Block cache — an LRU cache of decompressed SSTable data blocks.
14//!
15//! LevelDB uses a generic sharded LRU cache with custom deleters and opaque
16//! handles (`util/cache.cc`).  In Rust, `Arc<Block>` replaces the handle +
17//! deleter pattern: dropping the cache's copy is enough; any `BlockIter` that
18//! already cloned the `Arc<Vec<u8>>` remains valid independently.
19//!
20//! The key is `(cache_id, block_offset)`.  Each `Table` gets a unique `cache_id`
21//! from [`BlockCache::new_id`] so blocks from different files never collide even
22//! when file numbers are reused after compaction.
23//!
24//! Default capacity: 8 MiB (matching LevelDB's `Options::block_cache` default).
25
26use crate::table::block::Block;
27use std::collections::{HashMap, VecDeque};
28use std::sync::atomic::{AtomicU64, Ordering};
29use std::sync::{Arc, Mutex};
30
31/// Default block cache capacity in bytes.
32pub const DEFAULT_BLOCK_CACHE_CAPACITY: usize = 8 * 1024 * 1024;
33
34struct CacheInner {
35  /// Maximum total bytes of block data held in the cache.
36  capacity: usize,
37  /// Current total bytes charged (sum of `block.data().len()` for each entry).
38  usage: usize,
39  /// LRU order: front = least-recently used, back = most-recently used.
40  order: VecDeque<(u64, u64)>,
41  /// Maps `(cache_id, block_offset) → Block`.
42  map: HashMap<(u64, u64), Block>,
43}
44
45impl CacheInner {
46  fn get(&mut self, cache_id: u64, offset: u64) -> Option<Block> {
47    let key = (cache_id, offset);
48    let block = self.map.get(&key)?.clone();
49    // Promote to MRU.
50    if let Some(pos) = self.order.iter().position(|&k| k == key) {
51      self.order.remove(pos);
52    }
53    self.order.push_back(key);
54    Some(block)
55  }
56
57  fn insert(&mut self, cache_id: u64, offset: u64, block: Block) {
58    let key = (cache_id, offset);
59    let charge = block.data().len();
60
61    // If already present, replace and update usage.
62    if let Some(old) = self.map.remove(&key) {
63      self.usage = self.usage.saturating_sub(old.data().len());
64      if let Some(pos) = self.order.iter().position(|&k| k == key) {
65        self.order.remove(pos);
66      }
67    }
68
69    // Evict LRU entries until we have room (or only one entry remains).
70    while self.usage + charge > self.capacity && !self.order.is_empty() {
71      if let Some(evict_key) = self.order.pop_front() {
72        if let Some(evicted) = self.map.remove(&evict_key) {
73          self.usage = self.usage.saturating_sub(evicted.data().len());
74        }
75      }
76    }
77
78    self.order.push_back(key);
79    self.map.insert(key, block);
80    self.usage += charge;
81  }
82}
83
84/// An LRU cache of decompressed SSTable data blocks, keyed by `(cache_id, block_offset)`.
85///
86/// `BlockCache` is cheaply cloneable (`Arc`-backed) and safe to share across threads.
87///
88/// See `include/leveldb/cache.h` and `util/cache.cc`.
89#[derive(Clone)]
90pub struct BlockCache {
91  next_id: Arc<AtomicU64>,
92  inner: Arc<Mutex<CacheInner>>,
93}
94
95impl BlockCache {
96  /// Create a new block cache with the given byte capacity.
97  pub fn new(capacity: usize) -> Self {
98    BlockCache {
99      next_id: Arc::new(AtomicU64::new(1)),
100      inner: Arc::new(Mutex::new(CacheInner {
101        capacity,
102        usage: 0,
103        order: VecDeque::new(),
104        map: HashMap::new(),
105      })),
106    }
107  }
108
109  /// Allocate a unique cache ID for a new `Table`.
110  ///
111  /// Each `Table` calls this once at open time so its blocks never collide with
112  /// another table's blocks even when file numbers are reused after compaction.
113  pub(crate) fn new_id(&self) -> u64 {
114    self.next_id.fetch_add(1, Ordering::Relaxed)
115  }
116
117  /// Look up a block by `(cache_id, block_offset)`.
118  ///
119  /// Returns a clone of the cached `Block` on hit (cheap — `Block` wraps `Arc<Vec<u8>>`).
120  pub(crate) fn get(&self, cache_id: u64, offset: u64) -> Option<Block> {
121    self.inner.lock().unwrap().get(cache_id, offset)
122  }
123
124  /// Insert `block` at `(cache_id, block_offset)`, evicting LRU entries as needed.
125  pub(crate) fn insert(&self, cache_id: u64, offset: u64, block: Block) {
126    self.inner.lock().unwrap().insert(cache_id, offset, block);
127  }
128}
129
130#[cfg(test)]
131mod tests {
132  use super::*;
133
134  fn make_block(size: usize) -> Block {
135    Block::new(vec![0u8; size])
136  }
137
138  #[test]
139  fn hit_after_insert() {
140    let cache = BlockCache::new(1024);
141    let id = cache.new_id();
142    cache.insert(id, 0, make_block(100));
143    assert!(cache.get(id, 0).is_some());
144  }
145
146  #[test]
147  fn miss_on_absent_key() {
148    let cache = BlockCache::new(1024);
149    assert!(cache.get(1, 0).is_none());
150  }
151
152  #[test]
153  fn evicts_lru_when_over_capacity() {
154    // Capacity = 200 bytes; insert two 100-byte blocks, then a third — first must be evicted.
155    let cache = BlockCache::new(200);
156    let id = cache.new_id();
157    cache.insert(id, 0, make_block(100)); // block A
158    cache.insert(id, 100, make_block(100)); // block B — now at capacity
159    cache.insert(id, 200, make_block(100)); // block C — A must be evicted
160    assert!(cache.get(id, 0).is_none(), "A should have been evicted");
161    assert!(cache.get(id, 100).is_some(), "B should still be present");
162    assert!(cache.get(id, 200).is_some(), "C should be present");
163  }
164
165  #[test]
166  fn lru_promotes_on_get() {
167    // Insert A then B; access A so it becomes MRU; insert C — B should be evicted.
168    let cache = BlockCache::new(200);
169    let id = cache.new_id();
170    cache.insert(id, 0, make_block(100)); // A (LRU)
171    cache.insert(id, 100, make_block(100)); // B (MRU); capacity reached
172    cache.get(id, 0); // Promote A → now B is LRU
173    cache.insert(id, 200, make_block(100)); // C — B should be evicted
174    assert!(
175      cache.get(id, 0).is_some(),
176      "A should still be present (was promoted)"
177    );
178    assert!(cache.get(id, 100).is_none(), "B should have been evicted");
179    assert!(cache.get(id, 200).is_some(), "C should be present");
180  }
181
182  #[test]
183  fn unique_ids_per_new_id_call() {
184    let cache = BlockCache::new(1024);
185    let id1 = cache.new_id();
186    let id2 = cache.new_id();
187    assert_ne!(id1, id2);
188  }
189
190  #[test]
191  fn clone_shares_state() {
192    let cache = BlockCache::new(1024);
193    let cache2 = cache.clone();
194    let id = cache.new_id();
195    cache.insert(id, 0, make_block(64));
196    // The clone should see the same entry.
197    assert!(cache2.get(id, 0).is_some());
198  }
199}