spacetimedb_table/
blob_store.rs

1//! Provides the interface [`BlobStore`] that tables use to talk to
2//! a blob store engine for large var-len objects.
3//!
4//! These blob objects are referred to by their [`BlobHash`],
5//! which is currently defined through BLAKE3 on the bytes of the blob object.
6//!
7//! Two simple implementations are provided,
8//! primarily for tests and benchmarking.
9//! - [`NullBlobStore`], a blob store that always panics.
10//!   Used when ensuring that the blob store is unreachable in a scenario.
11//! - [`HashMapBlobStore`], a blob store backed by a `HashMap` that refcounts blob objects.
12//!   It is not optimize and is mainly intended for testing purposes.
13
14use blake3::hash;
15use spacetimedb_data_structures::map::{Entry, HashMap};
16use spacetimedb_lib::{de::Deserialize, ser::Serialize};
17
18use crate::MemoryUsage;
19
20/// The content address of a blob-stored object.
21#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash, Debug, Serialize, Deserialize)]
22pub struct BlobHash {
23    /// The hash of the blob-stored object.
24    ///
25    /// Uses BLAKE3 which fits in 32 bytes.
26    pub data: [u8; BlobHash::SIZE],
27}
28
29impl MemoryUsage for BlobHash {}
30
31impl BlobHash {
32    /// The size of the hash function's output in bytes.
33    pub const SIZE: usize = 32;
34
35    /// Returns the blob hash for `bytes`.
36    pub fn hash_from_bytes(bytes: &[u8]) -> Self {
37        let data = hash(bytes).into();
38        Self { data }
39    }
40}
41
42impl TryFrom<&[u8]> for BlobHash {
43    type Error = ();
44
45    fn try_from(data: &[u8]) -> Result<Self, Self::Error> {
46        let data: [u8; Self::SIZE] = data.try_into().map_err(drop)?;
47        Ok(Self { data })
48    }
49}
50
51/// An error that signifies that a [`BlobHash`] wasn't associated with a large blob object.
52#[derive(Debug)]
53pub struct NoSuchBlobError;
54
55/// Iterator returned by [`BlobStore::iter_blobs`].
56///
57/// Each element is a tuple `(hash, uses, data)`,
58/// where `hash` is a blob's content-addressed [`BlobHash`],
59/// `uses` is the number of references to that blob,
60/// and `data` is the data itself.
61pub type BlobsIter<'a> = Box<dyn Iterator<Item = (&'a BlobHash, usize, &'a [u8])> + 'a>;
62
63/// The interface that tables use to talk to the blob store engine for large var-len objects.
64///
65/// These blob objects are referred to by their [`BlobHash`],
66/// which is currently defined through BLAKE3 on the bytes of the blob object.
67pub trait BlobStore: Sync {
68    /// Mark the `hash` as used.
69    ///
70    /// This is a more efficient way of doing:
71    /// ```ignore
72    /// let bytes = self.retrieve_blob(&hash);
73    /// let _ = self.insert_blob(&bytes);
74    /// ```
75    fn clone_blob(&mut self, hash: &BlobHash) -> Result<(), NoSuchBlobError>;
76
77    /// Insert `bytes` into the blob store.
78    ///
79    /// Returns the content address of `bytes` a `BlobHash`
80    /// which can be used in [`retrieve_blob`] to fetch it.
81    fn insert_blob(&mut self, bytes: &[u8]) -> BlobHash;
82
83    /// Insert `hash` referring to `bytes` and mark its refcount as `uses`.
84    ///
85    /// Used when restoring from a snapshot.
86    fn insert_with_uses(&mut self, hash: &BlobHash, uses: usize, bytes: Box<[u8]>);
87
88    /// Returns the bytes stored at the content address `hash`.
89    fn retrieve_blob(&self, hash: &BlobHash) -> Result<&[u8], NoSuchBlobError>;
90
91    /// Marks the `hash` as unused.
92    ///
93    /// Depending on the strategy employed by the blob store,
94    /// this might not actually free the data,
95    /// but rather just decrement a reference count.
96    fn free_blob(&mut self, hash: &BlobHash) -> Result<(), NoSuchBlobError>;
97
98    /// Iterate over all blobs present in the blob store.
99    ///
100    /// Each element is a tuple `(hash, uses, data)`,
101    /// where `hash` is a blob's content-addressed [`BlobHash`],
102    /// `uses` is the number of references to that blob,
103    /// and `data` is the data itself.
104    ///
105    /// Used when capturing a snapshot.
106    fn iter_blobs(&self) -> BlobsIter<'_>;
107
108    /// Returns the amount of memory in bytes used by blobs in this `BlobStore`.
109    ///
110    /// Duplicate blobs are counted a number of times equal to their refcount.
111    /// This is in order to preserve the property that inserting a large blob
112    /// causes this quantity to increase by that blob's size,
113    /// and deleting a large blob causes it to decrease the same amount.
114    fn bytes_used_by_blobs(&self) -> u64 {
115        self.iter_blobs()
116            .map(|(_, uses, data)| data.len() as u64 * uses as u64)
117            .sum()
118    }
119
120    /// Returns the number of blobs, or more precisely, blob-usages, recorded in this `BlobStore`.
121    ///
122    /// Duplicate blobs are counted a number of times equal to their refcount.
123    /// This is in order to preserve the property that inserting a large blob
124    /// causes this quantity to increase by 1, and deleting a large blob causes it to decrease by 1.
125    fn num_blobs(&self) -> u64 {
126        self.iter_blobs().map(|(_, uses, _)| uses as u64).sum()
127    }
128}
129
130/// A blob store that panics on all operations.
131/// Used for tests when you want to ensure that the blob store isn't used.
132#[derive(Default)]
133pub struct NullBlobStore;
134
135impl BlobStore for NullBlobStore {
136    fn clone_blob(&mut self, _hash: &BlobHash) -> Result<(), NoSuchBlobError> {
137        unimplemented!("NullBlobStore doesn't do anything")
138    }
139
140    fn insert_blob(&mut self, _bytes: &[u8]) -> BlobHash {
141        unimplemented!("NullBlobStore doesn't do anything")
142    }
143
144    fn insert_with_uses(&mut self, _hash: &BlobHash, _uses: usize, _bytes: Box<[u8]>) {
145        unimplemented!("NullBlobStore doesn't do anything")
146    }
147
148    fn retrieve_blob(&self, _hash: &BlobHash) -> Result<&[u8], NoSuchBlobError> {
149        unimplemented!("NullBlobStore doesn't do anything")
150    }
151
152    fn free_blob(&mut self, _hash: &BlobHash) -> Result<(), NoSuchBlobError> {
153        unimplemented!("NullBlobStore doesn't do anything")
154    }
155
156    fn iter_blobs(&self) -> BlobsIter<'_> {
157        unimplemented!("NullBlobStore doesn't do anything")
158    }
159}
160
161/// A blob store that is backed by a hash map with a reference counted value.
162/// Used for tests when you need an actual blob store.
163#[derive(Default, PartialEq, Eq, Debug)]
164pub struct HashMapBlobStore {
165    /// For testing, we use a hash map with a reference count
166    /// to handle freeing and cloning correctly.
167    map: HashMap<BlobHash, BlobObject>,
168}
169
170impl MemoryUsage for HashMapBlobStore {
171    fn heap_usage(&self) -> usize {
172        let Self { map } = self;
173        map.heap_usage()
174    }
175}
176
177/// A blob object including a reference count and the data.
178#[derive(PartialEq, Eq, Debug)]
179struct BlobObject {
180    /// Reference count of the blob.
181    uses: usize,
182    /// The blob data.
183    blob: Box<[u8]>,
184}
185
186impl MemoryUsage for BlobObject {
187    fn heap_usage(&self) -> usize {
188        let Self { uses, blob } = self;
189        uses.heap_usage() + blob.heap_usage()
190    }
191}
192
193impl BlobStore for HashMapBlobStore {
194    fn clone_blob(&mut self, hash: &BlobHash) -> Result<(), NoSuchBlobError> {
195        self.map.get_mut(hash).ok_or(NoSuchBlobError)?.uses += 1;
196        Ok(())
197    }
198
199    fn insert_blob(&mut self, bytes: &[u8]) -> BlobHash {
200        let hash = BlobHash::hash_from_bytes(bytes);
201        self.map
202            .entry(hash)
203            .and_modify(|v| v.uses += 1)
204            .or_insert_with(|| BlobObject {
205                blob: bytes.into(),
206                uses: 1,
207            });
208        hash
209    }
210
211    fn insert_with_uses(&mut self, hash: &BlobHash, uses: usize, bytes: Box<[u8]>) {
212        debug_assert_eq!(hash, &BlobHash::hash_from_bytes(&bytes));
213        self.map
214            .entry(*hash)
215            .and_modify(|v| v.uses += uses)
216            .or_insert_with(|| BlobObject { blob: bytes, uses });
217    }
218
219    fn retrieve_blob(&self, hash: &BlobHash) -> Result<&[u8], NoSuchBlobError> {
220        self.map.get(hash).map(|obj| &*obj.blob).ok_or(NoSuchBlobError)
221    }
222
223    fn free_blob(&mut self, hash: &BlobHash) -> Result<(), NoSuchBlobError> {
224        match self.map.entry(*hash) {
225            Entry::Vacant(_) => return Err(NoSuchBlobError),
226            Entry::Occupied(entry) if entry.get().uses == 1 => drop(entry.remove()),
227            Entry::Occupied(mut entry) => entry.get_mut().uses -= 1,
228        }
229        Ok(())
230    }
231
232    fn iter_blobs(&self) -> BlobsIter<'_> {
233        Box::new(self.map.iter().map(|(hash, obj)| (hash, obj.uses, &obj.blob[..])))
234    }
235}
236
237#[cfg(test)]
238impl HashMapBlobStore {
239    /// Returns an iterator over the (hash, usage count, blob bytes) triple.
240    fn iter(&self) -> impl Iterator<Item = (&BlobHash, usize, &[u8])> + '_ {
241        self.map.iter().map(|(hash, obj)| (hash, obj.uses, &*obj.blob))
242    }
243
244    /// Returns a map relating blob hashes to the usage count in this blob store.
245    pub fn usage_counter(&self) -> HashMap<BlobHash, usize> {
246        self.iter().map(|(hash, uses, _)| (*hash, uses)).collect()
247    }
248}