mini_lsm/table/
builder.rs

1use std::path::Path;
2use std::sync::Arc;
3
4use anyhow::Result;
5use bytes::BufMut;
6
7use super::bloom::Bloom;
8use super::{BlockMeta, FileObject, SsTable};
9use crate::block::BlockBuilder;
10use crate::key::{KeySlice, KeyVec};
11use crate::lsm_storage::BlockCache;
12
13/// Builds an SSTable from key-value pairs.
14pub struct SsTableBuilder {
15    builder: BlockBuilder,
16    first_key: KeyVec,
17    last_key: KeyVec,
18    data: Vec<u8>,
19    pub(crate) meta: Vec<BlockMeta>,
20    block_size: usize,
21    key_hashes: Vec<u32>,
22}
23
24impl SsTableBuilder {
25    /// Create a builder based on target block size.
26    pub fn new(block_size: usize) -> Self {
27        Self {
28            data: Vec::new(),
29            meta: Vec::new(),
30            first_key: KeyVec::new(),
31            last_key: KeyVec::new(),
32            block_size,
33            builder: BlockBuilder::new(block_size),
34            key_hashes: Vec::new(),
35        }
36    }
37
38    /// Adds a key-value pair to SSTable
39    pub fn add(&mut self, key: KeySlice, value: &[u8]) {
40        if self.first_key.is_empty() {
41            self.first_key.set_from_slice(key);
42        }
43
44        self.key_hashes.push(farmhash::fingerprint32(key.raw_ref()));
45
46        if self.builder.add(key, value) {
47            self.last_key.set_from_slice(key);
48            return;
49        }
50
51        // create a new block builder and append block data
52        self.finish_block();
53
54        // add the key-value pair to the next block
55        assert!(self.builder.add(key, value));
56        self.first_key.set_from_slice(key);
57        self.last_key.set_from_slice(key);
58    }
59
60    /// Get the estimated size of the SSTable.
61    pub fn estimated_size(&self) -> usize {
62        self.data.len()
63    }
64
65    fn finish_block(&mut self) {
66        let builder = std::mem::replace(&mut self.builder, BlockBuilder::new(self.block_size));
67        let encoded_block = builder.build().encode();
68        self.meta.push(BlockMeta {
69            offset: self.data.len(),
70            first_key: std::mem::take(&mut self.first_key).into_key_bytes(),
71            last_key: std::mem::take(&mut self.last_key).into_key_bytes(),
72        });
73        let checksum = crc32fast::hash(&encoded_block);
74        self.data.extend(encoded_block);
75        self.data.put_u32(checksum);
76    }
77
78    /// Builds the SSTable and writes it to the given path. Use the `FileObject` structure to manipulate the disk objects.
79    pub fn build(
80        mut self,
81        id: usize,
82        block_cache: Option<Arc<BlockCache>>,
83        path: impl AsRef<Path>,
84    ) -> Result<SsTable> {
85        self.finish_block();
86        let mut buf = self.data;
87        let meta_offset = buf.len();
88        BlockMeta::encode_block_meta(&self.meta, &mut buf);
89        buf.put_u32(meta_offset as u32);
90        let bloom = Bloom::build_from_key_hashes(
91            &self.key_hashes,
92            Bloom::bloom_bits_per_key(self.key_hashes.len(), 0.01),
93        );
94        let bloom_offset = buf.len();
95        bloom.encode(&mut buf);
96        buf.put_u32(bloom_offset as u32);
97        let file = FileObject::create(path.as_ref(), buf)?;
98        Ok(SsTable {
99            id,
100            file,
101            first_key: self.meta.first().unwrap().first_key.clone(),
102            last_key: self.meta.last().unwrap().last_key.clone(),
103            block_meta: self.meta,
104            block_meta_offset: meta_offset,
105            block_cache,
106            bloom: Some(bloom),
107            max_ts: 0, // will be changed to latest ts in week 2
108        })
109    }
110
111    #[cfg(test)]
112    pub(crate) fn build_for_test(self, path: impl AsRef<Path>) -> Result<SsTable> {
113        self.build(0, None, path)
114    }
115}