mini_lsm/table/
builder.rs1use std::path::Path;
2use std::sync::Arc;
3
4use anyhow::Result;
5use bytes::BufMut;
6
7use super::bloom::Bloom;
8use super::{BlockMeta, FileObject, SsTable};
9use crate::block::BlockBuilder;
10use crate::key::{KeySlice, KeyVec};
11use crate::lsm_storage::BlockCache;
12
13pub struct SsTableBuilder {
15 builder: BlockBuilder,
16 first_key: KeyVec,
17 last_key: KeyVec,
18 data: Vec<u8>,
19 pub(crate) meta: Vec<BlockMeta>,
20 block_size: usize,
21 key_hashes: Vec<u32>,
22}
23
24impl SsTableBuilder {
25 pub fn new(block_size: usize) -> Self {
27 Self {
28 data: Vec::new(),
29 meta: Vec::new(),
30 first_key: KeyVec::new(),
31 last_key: KeyVec::new(),
32 block_size,
33 builder: BlockBuilder::new(block_size),
34 key_hashes: Vec::new(),
35 }
36 }
37
38 pub fn add(&mut self, key: KeySlice, value: &[u8]) {
40 if self.first_key.is_empty() {
41 self.first_key.set_from_slice(key);
42 }
43
44 self.key_hashes.push(farmhash::fingerprint32(key.raw_ref()));
45
46 if self.builder.add(key, value) {
47 self.last_key.set_from_slice(key);
48 return;
49 }
50
51 self.finish_block();
53
54 assert!(self.builder.add(key, value));
56 self.first_key.set_from_slice(key);
57 self.last_key.set_from_slice(key);
58 }
59
60 pub fn estimated_size(&self) -> usize {
62 self.data.len()
63 }
64
65 fn finish_block(&mut self) {
66 let builder = std::mem::replace(&mut self.builder, BlockBuilder::new(self.block_size));
67 let encoded_block = builder.build().encode();
68 self.meta.push(BlockMeta {
69 offset: self.data.len(),
70 first_key: std::mem::take(&mut self.first_key).into_key_bytes(),
71 last_key: std::mem::take(&mut self.last_key).into_key_bytes(),
72 });
73 let checksum = crc32fast::hash(&encoded_block);
74 self.data.extend(encoded_block);
75 self.data.put_u32(checksum);
76 }
77
78 pub fn build(
80 mut self,
81 id: usize,
82 block_cache: Option<Arc<BlockCache>>,
83 path: impl AsRef<Path>,
84 ) -> Result<SsTable> {
85 self.finish_block();
86 let mut buf = self.data;
87 let meta_offset = buf.len();
88 BlockMeta::encode_block_meta(&self.meta, &mut buf);
89 buf.put_u32(meta_offset as u32);
90 let bloom = Bloom::build_from_key_hashes(
91 &self.key_hashes,
92 Bloom::bloom_bits_per_key(self.key_hashes.len(), 0.01),
93 );
94 let bloom_offset = buf.len();
95 bloom.encode(&mut buf);
96 buf.put_u32(bloom_offset as u32);
97 let file = FileObject::create(path.as_ref(), buf)?;
98 Ok(SsTable {
99 id,
100 file,
101 first_key: self.meta.first().unwrap().first_key.clone(),
102 last_key: self.meta.last().unwrap().last_key.clone(),
103 block_meta: self.meta,
104 block_meta_offset: meta_offset,
105 block_cache,
106 bloom: Some(bloom),
107 max_ts: 0, })
109 }
110
111 #[cfg(test)]
112 pub(crate) fn build_for_test(self, path: impl AsRef<Path>) -> Result<SsTable> {
113 self.build(0, None, path)
114 }
115}