spacetimedb_table/
blob_store.rs

1//! Provides the interface [`BlobStore`] that tables use to talk to
2//! a blob store engine for large var-len objects.
3//!
4//! These blob objects are referred to by their [`BlobHash`],
5//! which is currently defined through BLAKE3 on the bytes of the blob object.
6//!
7//! Two simple implementations are provided,
8//! primarily for tests and benchmarking.
9//! - [`NullBlobStore`], a blob store that always panics.
10//!   Used when ensuring that the blob store is unreachable in a scenario.
11//! - [`HashMapBlobStore`], a blob store backed by a `HashMap` that refcounts blob objects.
12//!   It is not optimize and is mainly intended for testing purposes.
13
14use blake3::hash;
15use spacetimedb_data_structures::map::{Entry, HashMap};
16use spacetimedb_lib::{de::Deserialize, ser::Serialize};
17use spacetimedb_memory_usage::MemoryUsage;
18
19/// The content address of a blob-stored object.
20#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash, Debug, Serialize, Deserialize)]
21pub struct BlobHash {
22    /// The hash of the blob-stored object.
23    ///
24    /// Uses BLAKE3 which fits in 32 bytes.
25    pub data: [u8; BlobHash::SIZE],
26}
27
28impl MemoryUsage for BlobHash {}
29
30impl BlobHash {
31    /// The size of the hash function's output in bytes.
32    pub const SIZE: usize = 32;
33
34    /// Returns the blob hash for `bytes`.
35    pub fn hash_from_bytes(bytes: &[u8]) -> Self {
36        let data = hash(bytes).into();
37        Self { data }
38    }
39}
40
41impl TryFrom<&[u8]> for BlobHash {
42    type Error = ();
43
44    fn try_from(data: &[u8]) -> Result<Self, Self::Error> {
45        let data: [u8; Self::SIZE] = data.try_into().map_err(drop)?;
46        Ok(Self { data })
47    }
48}
49
50/// An error that signifies that a [`BlobHash`] wasn't associated with a large blob object.
51#[derive(Debug)]
52pub struct NoSuchBlobError;
53
54/// Iterator returned by [`BlobStore::iter_blobs`].
55///
56/// Each element is a tuple `(hash, uses, data)`,
57/// where `hash` is a blob's content-addressed [`BlobHash`],
58/// `uses` is the number of references to that blob,
59/// and `data` is the data itself.
60pub type BlobsIter<'a> = Box<dyn Iterator<Item = (&'a BlobHash, usize, &'a [u8])> + 'a>;
61
62/// The interface that tables use to talk to the blob store engine for large var-len objects.
63///
64/// These blob objects are referred to by their [`BlobHash`],
65/// which is currently defined through BLAKE3 on the bytes of the blob object.
66pub trait BlobStore: Sync {
67    /// Mark the `hash` as used.
68    ///
69    /// This is a more efficient way of doing:
70    /// ```ignore
71    /// let bytes = self.retrieve_blob(&hash);
72    /// let _ = self.insert_blob(&bytes);
73    /// ```
74    fn clone_blob(&mut self, hash: &BlobHash) -> Result<(), NoSuchBlobError>;
75
76    /// Insert `bytes` into the blob store.
77    ///
78    /// Returns the content address of `bytes` a `BlobHash`
79    /// which can be used in [`retrieve_blob`] to fetch it.
80    fn insert_blob(&mut self, bytes: &[u8]) -> BlobHash;
81
82    /// Insert `hash` referring to `bytes` and mark its refcount as `uses`.
83    ///
84    /// Used when restoring from a snapshot.
85    fn insert_with_uses(&mut self, hash: &BlobHash, uses: usize, bytes: Box<[u8]>);
86
87    /// Returns the bytes stored at the content address `hash`.
88    fn retrieve_blob(&self, hash: &BlobHash) -> Result<&[u8], NoSuchBlobError>;
89
90    /// Marks the `hash` as unused.
91    ///
92    /// Depending on the strategy employed by the blob store,
93    /// this might not actually free the data,
94    /// but rather just decrement a reference count.
95    fn free_blob(&mut self, hash: &BlobHash) -> Result<(), NoSuchBlobError>;
96
97    /// Iterate over all blobs present in the blob store.
98    ///
99    /// Each element is a tuple `(hash, uses, data)`,
100    /// where `hash` is a blob's content-addressed [`BlobHash`],
101    /// `uses` is the number of references to that blob,
102    /// and `data` is the data itself.
103    ///
104    /// Used when capturing a snapshot.
105    fn iter_blobs(&self) -> BlobsIter<'_>;
106
107    /// Returns the amount of memory in bytes used by blobs in this `BlobStore`.
108    ///
109    /// Duplicate blobs are counted a number of times equal to their refcount.
110    /// This is in order to preserve the property that inserting a large blob
111    /// causes this quantity to increase by that blob's size,
112    /// and deleting a large blob causes it to decrease the same amount.
113    fn bytes_used_by_blobs(&self) -> u64 {
114        self.iter_blobs()
115            .map(|(_, uses, data)| data.len() as u64 * uses as u64)
116            .sum()
117    }
118
119    /// Returns the number of blobs, or more precisely, blob-usages, recorded in this `BlobStore`.
120    ///
121    /// Duplicate blobs are counted a number of times equal to their refcount.
122    /// This is in order to preserve the property that inserting a large blob
123    /// causes this quantity to increase by 1, and deleting a large blob causes it to decrease by 1.
124    fn num_blobs(&self) -> u64 {
125        self.iter_blobs().map(|(_, uses, _)| uses as u64).sum()
126    }
127}
128
129/// A blob store that panics on all operations.
130/// Used for tests when you want to ensure that the blob store isn't used.
131#[derive(Default)]
132pub struct NullBlobStore;
133
134impl BlobStore for NullBlobStore {
135    fn clone_blob(&mut self, _hash: &BlobHash) -> Result<(), NoSuchBlobError> {
136        unimplemented!("NullBlobStore doesn't do anything")
137    }
138
139    fn insert_blob(&mut self, _bytes: &[u8]) -> BlobHash {
140        unimplemented!("NullBlobStore doesn't do anything")
141    }
142
143    fn insert_with_uses(&mut self, _hash: &BlobHash, _uses: usize, _bytes: Box<[u8]>) {
144        unimplemented!("NullBlobStore doesn't do anything")
145    }
146
147    fn retrieve_blob(&self, _hash: &BlobHash) -> Result<&[u8], NoSuchBlobError> {
148        unimplemented!("NullBlobStore doesn't do anything")
149    }
150
151    fn free_blob(&mut self, _hash: &BlobHash) -> Result<(), NoSuchBlobError> {
152        unimplemented!("NullBlobStore doesn't do anything")
153    }
154
155    fn iter_blobs(&self) -> BlobsIter<'_> {
156        unimplemented!("NullBlobStore doesn't do anything")
157    }
158}
159
160/// A blob store that is backed by a hash map with a reference counted value.
161/// Used for tests when you need an actual blob store.
162#[derive(Default, PartialEq, Eq, Debug)]
163pub struct HashMapBlobStore {
164    /// For testing, we use a hash map with a reference count
165    /// to handle freeing and cloning correctly.
166    map: HashMap<BlobHash, BlobObject>,
167}
168
169impl MemoryUsage for HashMapBlobStore {
170    fn heap_usage(&self) -> usize {
171        let Self { map } = self;
172        map.heap_usage()
173    }
174}
175
176/// A blob object including a reference count and the data.
177#[derive(PartialEq, Eq, Debug)]
178struct BlobObject {
179    /// Reference count of the blob.
180    uses: usize,
181    /// The blob data.
182    blob: Box<[u8]>,
183}
184
185impl MemoryUsage for BlobObject {
186    fn heap_usage(&self) -> usize {
187        let Self { uses, blob } = self;
188        uses.heap_usage() + blob.heap_usage()
189    }
190}
191
192impl BlobStore for HashMapBlobStore {
193    fn clone_blob(&mut self, hash: &BlobHash) -> Result<(), NoSuchBlobError> {
194        self.map.get_mut(hash).ok_or(NoSuchBlobError)?.uses += 1;
195        Ok(())
196    }
197
198    fn insert_blob(&mut self, bytes: &[u8]) -> BlobHash {
199        let hash = BlobHash::hash_from_bytes(bytes);
200        self.map
201            .entry(hash)
202            .and_modify(|v| v.uses += 1)
203            .or_insert_with(|| BlobObject {
204                blob: bytes.into(),
205                uses: 1,
206            });
207        hash
208    }
209
210    fn insert_with_uses(&mut self, hash: &BlobHash, uses: usize, bytes: Box<[u8]>) {
211        debug_assert_eq!(hash, &BlobHash::hash_from_bytes(&bytes));
212        self.map
213            .entry(*hash)
214            .and_modify(|v| v.uses += uses)
215            .or_insert_with(|| BlobObject { blob: bytes, uses });
216    }
217
218    fn retrieve_blob(&self, hash: &BlobHash) -> Result<&[u8], NoSuchBlobError> {
219        self.map.get(hash).map(|obj| &*obj.blob).ok_or(NoSuchBlobError)
220    }
221
222    fn free_blob(&mut self, hash: &BlobHash) -> Result<(), NoSuchBlobError> {
223        match self.map.entry(*hash) {
224            Entry::Vacant(_) => return Err(NoSuchBlobError),
225            Entry::Occupied(entry) if entry.get().uses == 1 => drop(entry.remove()),
226            Entry::Occupied(mut entry) => entry.get_mut().uses -= 1,
227        }
228        Ok(())
229    }
230
231    fn iter_blobs(&self) -> BlobsIter<'_> {
232        Box::new(self.map.iter().map(|(hash, obj)| (hash, obj.uses, &obj.blob[..])))
233    }
234}
235
236#[cfg(test)]
237impl HashMapBlobStore {
238    /// Returns an iterator over the (hash, usage count, blob bytes) triple.
239    fn iter(&self) -> impl Iterator<Item = (&BlobHash, usize, &[u8])> + '_ {
240        self.map.iter().map(|(hash, obj)| (hash, obj.uses, &*obj.blob))
241    }
242
243    /// Returns a map relating blob hashes to the usage count in this blob store.
244    pub fn usage_counter(&self) -> HashMap<BlobHash, usize> {
245        self.iter().map(|(hash, uses, _)| (*hash, uses)).collect()
246    }
247}