lsm_tree/block_cache.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
// Copyright (c) 2024-present, fjall-rs
// This source code is licensed under both the Apache 2.0 and MIT License
// (found in the LICENSE-* files in the repository)
use crate::either::Either::{self, Left, Right};
use crate::segment::id::GlobalSegmentId;
use crate::segment::{block_index::IndexBlock, value_block::ValueBlock};
use quick_cache::Weighter;
use quick_cache::{sync::Cache, Equivalent};
use std::sync::Arc;
type Item = Either<Arc<ValueBlock>, Arc<IndexBlock>>;
// (Type (disk or index), Segment ID, Block offset)
#[derive(Eq, std::hash::Hash, PartialEq)]
struct CacheKey(GlobalSegmentId, u64);
impl Equivalent<CacheKey> for (GlobalSegmentId, u64) {
fn equivalent(&self, key: &CacheKey) -> bool {
self.0 == key.0 && self.1 == key.1
}
}
impl From<(GlobalSegmentId, u64)> for CacheKey {
fn from((gid, bid): (GlobalSegmentId, u64)) -> Self {
Self(gid, bid)
}
}
#[derive(Clone)]
struct BlockWeighter;
impl Weighter<CacheKey, Item> for BlockWeighter {
fn weight(&self, _: &CacheKey, block: &Item) -> u64 {
#[allow(clippy::cast_possible_truncation)]
match block {
Either::Left(block) => block.header.uncompressed_length.into(),
Either::Right(block) => block.header.uncompressed_length.into(),
}
}
}
/// Block cache, in which blocks are cached in-memory
/// after being retrieved from disk
///
/// This speeds up consecutive queries to nearby data, improving
/// read performance for hot data.
///
/// # Examples
///
/// Sharing block cache between multiple trees
///
/// ```
/// # use lsm_tree::{Tree, Config, BlockCache};
/// # use std::sync::Arc;
/// #
/// // Provide 40 MB of cache capacity
/// let block_cache = Arc::new(BlockCache::with_capacity_bytes(40 * 1_000 * 1_000));
///
/// # let folder = tempfile::tempdir()?;
/// let tree1 = Config::new(folder).block_cache(block_cache.clone()).open()?;
/// # let folder = tempfile::tempdir()?;
/// let tree2 = Config::new(folder).block_cache(block_cache.clone()).open()?;
/// #
/// # Ok::<(), lsm_tree::Error>(())
/// ```
pub struct BlockCache {
data: Cache<CacheKey, Item, BlockWeighter, xxhash_rust::xxh3::Xxh3Builder>,
capacity: u64,
}
impl BlockCache {
/// Creates a new block cache with roughly `n` bytes of capacity.
#[must_use]
pub fn with_capacity_bytes(bytes: u64) -> Self {
use quick_cache::sync::DefaultLifecycle;
Self {
data: Cache::with(
1_000_000,
bytes,
BlockWeighter,
xxhash_rust::xxh3::Xxh3Builder::new(),
DefaultLifecycle::default(),
),
capacity: bytes,
}
}
/// Returns the amount of cached bytes.
#[must_use]
pub fn size(&self) -> u64 {
self.data.weight()
}
/// Returns the cache capacity in bytes.
#[must_use]
pub fn capacity(&self) -> u64 {
self.capacity
}
/// Returns the number of cached blocks.
#[must_use]
pub fn len(&self) -> usize {
self.data.len()
}
/// Returns `true` if there are no cached blocks.
#[must_use]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
#[doc(hidden)]
pub fn insert_disk_block(
&self,
segment_id: GlobalSegmentId,
offset: u64,
value: Arc<ValueBlock>,
) {
if self.capacity > 0 {
self.data.insert((segment_id, offset).into(), Left(value));
}
}
#[doc(hidden)]
pub fn insert_index_block(
&self,
segment_id: GlobalSegmentId,
offset: u64,
value: Arc<IndexBlock>,
) {
if self.capacity > 0 {
self.data.insert((segment_id, offset).into(), Right(value));
}
}
#[doc(hidden)]
#[must_use]
pub fn get_disk_block(
&self,
segment_id: GlobalSegmentId,
offset: u64,
) -> Option<Arc<ValueBlock>> {
let key = (segment_id, offset);
let item = self.data.get(&key)?;
Some(item.left())
}
#[doc(hidden)]
#[must_use]
pub fn get_index_block(
&self,
segment_id: GlobalSegmentId,
offset: u64,
) -> Option<Arc<IndexBlock>> {
let key = (segment_id, offset);
let item = self.data.get(&key)?;
Some(item.right())
}
}