simd_r_drive/storage_engine/traits/
reader.rs

1use simd_r_drive_entry_handle::EntryMetadata;
2use std::io::Result;
3
4pub trait DataStoreReader {
5    type EntryHandleType;
6
7    /// Checks whether a key currently exists in the store.
8    ///
9    /// This is a **constant‑time** lookup against the in‑memory
10    /// [`crate::storage_engine::KeyIndexer`] map.  
11    /// A key is considered to *exist* only if it is present **and not marked
12    /// as deleted**.
13    ///
14    /// # Parameters
15    /// - `key`: The **binary key** to check.
16    ///
17    /// # Returns
18    /// - `Ok(true)`: Key exists and is active.  
19    /// - `Ok(false)`: Key is absent or has been deleted.  
20    /// - `Err(std::io::Error)`: On I/O failure.
21    fn exists(&self, key: &[u8]) -> Result<bool>;
22
23    /// Checks whether a key with a pre-computed hash exists in the store.
24    ///
25    /// This is a more direct version of [`Self::exists`] that skips the hashing step,
26    /// making it faster if the hash is already known. Because the original key is not
27    /// provided, this check does not perform tag verification and relies solely on the
28    /// hash's presence in the index.
29    ///
30    /// # Parameters
31    /// - `prehashed_key`: The **pre-computed hash** of the key to check.
32    ///
33    /// # Returns
34    /// - `Ok(true)` if the key hash exists in the index.
35    /// - `Ok(false)` if the key hash is absent.
36    /// - `Err(std::io::Error)`: On I/O failure.
37    fn exists_with_key_hash(&self, prehashed_key: u64) -> Result<bool>;
38
39    /// Retrieves the most recent value associated with a given key.
40    ///
41    /// This method **efficiently looks up a key** using a fast in-memory index,
42    /// and returns the latest corresponding value if found.
43    ///
44    /// # Parameters:
45    /// - `key`: The **binary key** whose latest value is to be retrieved.
46    ///
47    /// # Returns:
48    /// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
49    /// - `Ok(None)`: If the key does not exist or is deleted.
50    /// - `Err(std::io::Error)`: On I/O failure.
51    ///
52    /// # Notes:
53    /// - The returned `EntryHandle` provides zero-copy access to the stored data.
54    fn read(&self, key: &[u8]) -> Result<Option<Self::EntryHandleType>>;
55
56    /// Retrieves the most recent value associated with a pre-computed key hash.
57    ///
58    /// This is a low-level alternative to [`Self::read`] that looks up an entry using
59    /// only its hash, bypassing the hashing step.
60    ///
61    /// # Warning
62    /// This method does **not** perform tag verification, as the original key is not
63    /// provided. This means that in the rare event of a hash collision, this function
64    /// could return the entry for a different key.
65    ///
66    /// # Parameters
67    /// - `prehashed_key`: The **pre-computed hash** of the key to retrieve.
68    ///
69    /// # Returns
70    /// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
71    /// - `Ok(None)`: If the key hash does not exist or is deleted.
72    /// - `Err(std::io::Error)`: On I/O failure.
73    fn read_with_key_hash(&self, prehashed_key: u64) -> Result<Option<Self::EntryHandleType>>;
74
75    /// Retrieves the last entry written to the file.
76    ///
77    /// # Returns:
78    /// - `Ok(Some(EntryHandle))`: Handle to the last entry, if any.
79    /// - `Ok(None)`: If the file is empty.
80    /// - `Err(std::io::Error)`: On I/O failure.
81    fn read_last_entry(&self) -> Result<Option<Self::EntryHandleType>>;
82
83    /// Reads many keys in one shot.
84    ///
85    /// This is the **vectorized** counterpart to [`crate::DataStore::read`].  
86    /// It takes a slice of raw-byte keys and returns a `Vec` whose *i-th* element
87    /// is the result of looking up the *i-th* key.
88    ///
89    /// *   **Zero-copy** – each `Some(EntryHandle)` points directly into the
90    ///     shared `Arc<Mmap>`; no payload is copied.
91    /// *   **Constant-time per key** – the in-memory [`crate::storage_engine::KeyIndexer`] map is used
92    ///     for each lookup, so the complexity is *O(n)* where *n* is
93    ///     `keys.len()`.
94    /// *   **Thread-safe** – a read lock on the index is taken once for the whole
95    ///     batch, so concurrent writers are still blocked only for the same short
96    ///     critical section that a single `read` would need.
97    ///
98    /// # Returns:
99    /// - `Ok(results)`: `Vec<Option<EntryHandle>>` in key order.
100    /// - `Err(std::io::Error)`: On I/O failure.
101    fn batch_read(&self, keys: &[&[u8]]) -> Result<Vec<Option<Self::EntryHandleType>>>;
102
103    /// Reads many keys in one shot using pre-computed hashes.
104    ///
105    /// This is a lower-level, high-performance version of [`Self::batch_read`].
106    /// It is designed for scenarios where the caller has already computed the key
107    /// hashes and wants to avoid the overhead of re-hashing. The method offers
108    /// an optional verification step to safeguard against hash collisions.
109    ///
110    /// * **Zero-copy**: Each `Some(EntryHandle)` provides a direct, zero-copy view
111    ///   into the memory-mapped file.
112    /// * **High-performance**: Bypasses the key hashing step if hashes are already
113    ///   available.
114    /// * **Thread-safe**: Acquires a single read lock for the entire batch
115    ///   operation, minimizing contention.
116    ///
117    /// # Parameters
118    /// - `prehashed_keys`: A slice of `u64` key hashes to look up.
119    /// - `non_hashed_keys`: An optional slice of the original, non-hashed keys
120    ///   corresponding to `prehashed_keys`.
121    ///     - If `Some(keys)`, the method performs a tag-based verification to ensure
122    ///       that the found entry truly belongs to the original key, preventing
123    ///       data retrieval from a hash collision. The length of this slice
124    ///       **must** match the length of `prehashed_keys`.
125    ///     - If `None`, this verification is skipped. The lookup relies solely
126    ///       on the hash, which is faster but carries a theoretical risk of
127    ///       returning incorrect data in the event of a hash collision.
128    ///
129    /// # Returns
130    /// - `Ok(results)`: A `Vec<Option<Self::EntryHandleType>>` where each element
131    ///   corresponds to the result of looking up the key at the same index.
132    /// - `Err(std::io::Error)`: On I/O failure or if the lengths of `prehashed_keys`
133    ///   and `non_hashed_keys` (when `Some`) do not match.
134    fn batch_read_hashed_keys(
135        &self,
136        prehashed_keys: &[u64],
137        non_hashed_keys: Option<&[&[u8]]>,
138    ) -> Result<Vec<Option<Self::EntryHandleType>>>;
139
140    /// Retrieves metadata for a given key.
141    ///
142    /// This method looks up a key in the storage and returns its associated metadata.
143    ///
144    /// # Parameters:
145    /// - `key`: The **binary key** whose metadata is to be retrieved.
146    ///
147    /// # Returns:
148    /// - `Ok(Some(metadata))`: Metadata if the key exists.
149    /// - `Ok(None)`: If the key is absent.
150    /// - `Err(std::io::Error)`: On I/O failure.
151    fn read_metadata(&self, key: &[u8]) -> Result<Option<EntryMetadata>>;
152
153    /// Counts **active** (non-deleted) key-value pairs in the storage.
154    ///
155    /// # Returns:
156    /// - `Ok(active_count)`: Total active entries.
157    /// - `Err(std::io::Error)`: On I/O failure.
158    fn len(&self) -> Result<usize>;
159
160    /// Determines if the store is empty or has no active keys.
161    ///
162    /// # Returns:
163    /// - `Ok(bool)`: Whether or not the store has any active keys.
164    /// - `Err(std::io::Error)`: On I/O failure.
165    fn is_empty(&self) -> Result<bool>;
166
167    /// Returns the current file size on disk (including those of deleted entries).
168    ///
169    /// # Returns:
170    /// - `Ok(bytes)`: File size in bytes.
171    /// - `Err(std::io::Error)`: On I/O failure.
172    fn file_size(&self) -> Result<u64>;
173}
174
175#[async_trait::async_trait]
176pub trait AsyncDataStoreReader {
177    type EntryHandleType;
178
179    /// Checks whether a key currently exists in the store.
180    ///
181    /// This is a **constant‑time** lookup against the in‑memory
182    /// [`crate::storage_engine::KeyIndexer`] map.  
183    /// A key is considered to *exist* only if it is present **and not marked
184    /// as deleted**.
185    ///
186    /// # Parameters
187    /// - `key`: The **binary key** to check.
188    ///
189    /// # Returns
190    /// - `Ok(true)`: Key exists and is active.  
191    /// - `Ok(false)`: Key is absent or has been deleted.  
192    /// - `Err(std::io::Error)`: On I/O failure.
193    async fn exists(&self, key: &[u8]) -> Result<bool>;
194
195    /// Checks whether a key with a pre-computed hash exists in the store.
196    ///
197    /// This is a more direct version of [`Self::exists`] that skips the hashing step,
198    /// making it faster if the hash is already known. Because the original key is not
199    /// provided, this check does not perform tag verification and relies solely on the
200    /// hash's presence in the index.
201    ///
202    /// # Parameters
203    /// - `prehashed_key`: The **pre-computed hash** of the key to check.
204    ///
205    /// # Returns
206    /// - `Ok(true)` if the key hash exists in the index.
207    /// - `Ok(false)` if the key hash is absent.
208    /// - `Err(std::io::Error)`: On I/O failure.
209    async fn exists_with_key_hash(&self, prehashed_key: u64) -> Result<bool>;
210
211    /// Retrieves the most recent value associated with a given key.
212    ///
213    /// This method **efficiently looks up a key** using a fast in-memory index,
214    /// and returns the latest corresponding value if found.
215    ///
216    /// # Parameters:
217    /// - `key`: The **binary key** whose latest value is to be retrieved.
218    ///
219    /// # Returns:
220    /// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
221    /// - `Ok(None)`: If the key does not exist or is deleted.
222    /// - `Err(std::io::Error)`: On I/O failure.
223    ///
224    /// # Notes:
225    /// - The returned `EntryHandle` provides zero-copy access to the stored data.
226    async fn read(&self, key: &[u8]) -> Result<Option<Self::EntryHandleType>>;
227
228    /// Retrieves the most recent value associated with a pre-computed key hash.
229    ///
230    /// This is a low-level alternative to [`Self::read`] that looks up an entry using
231    /// only its hash, bypassing the hashing step.
232    ///
233    /// # Warning
234    /// This method does **not** perform tag verification, as the original key is not
235    /// provided. This means that in the rare event of a hash collision, this function
236    /// could return the entry for a different key.
237    ///
238    /// # Parameters
239    /// - `prehashed_key`: The **pre-computed hash** of the key to retrieve.
240    ///
241    /// # Returns
242    /// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
243    /// - `Ok(None)`: If the key hash does not exist or is deleted.
244    /// - `Err(std::io::Error)`: On I/O failure.
245    async fn read_with_key_hash(&self, prehashed_key: u64)
246    -> Result<Option<Self::EntryHandleType>>;
247
248    /// Retrieves the last entry written to the file.
249    ///
250    /// # Returns:
251    /// - `Ok(Some(EntryHandle))`: Handle to the last entry, if any.
252    /// - `Ok(None)`: If the file is empty.
253    /// - `Err(std::io::Error)`: On I/O failure.
254    async fn read_last_entry(&self) -> Result<Option<Self::EntryHandleType>>;
255
256    /// Reads many keys in one shot.
257    ///
258    /// This is the **vectorized** counterpart to [`crate::DataStore::read`].  
259    /// It takes a slice of raw-byte keys and returns a `Vec` whose *i-th* element
260    /// is the result of looking up the *i-th* key.
261    ///
262    /// *   **Zero-copy** – each `Some(EntryHandle)` points directly into the
263    ///     shared `Arc<Mmap>`; no payload is copied.
264    /// *   **Constant-time per key** – the in-memory [`crate::storage_engine::KeyIndexer`] map is used
265    ///     for each lookup, so the complexity is *O(n)* where *n* is
266    ///     `keys.len()`.
267    /// *   **Thread-safe** – a read lock on the index is taken once for the whole
268    ///     batch, so concurrent writers are still blocked only for the same short
269    ///     critical section that a single `read` would need.
270    ///
271    /// # Returns:
272    /// - `Ok(results)`: `Vec<Option<EntryHandle>>` in key order.
273    /// - `Err(std::io::Error)`: On I/O failure.
274    async fn batch_read(&self, keys: &[&[u8]]) -> Result<Vec<Option<Self::EntryHandleType>>>;
275
276    /// Reads many keys in one shot using pre-computed hashes.
277    ///
278    /// This is a lower-level, high-performance version of [`Self::batch_read`].
279    /// It is designed for scenarios where the caller has already computed the key
280    /// hashes and wants to avoid the overhead of re-hashing. The method offers
281    /// an optional verification step to safeguard against hash collisions.
282    ///
283    /// * **Zero-copy**: Each `Some(EntryHandle)` provides a direct, zero-copy view
284    ///     into the memory-mapped file.
285    /// * **High-performance**: Bypasses the key hashing step if hashes are already
286    ///     available.
287    /// * **Thread-safe**: Acquires a single read lock for the entire batch
288    ///     operation, minimizing contention.
289    ///
290    /// # Parameters
291    /// - `prehashed_keys`: A slice of `u64` key hashes to look up.
292    /// - `non_hashed_keys`: An optional slice of the original, non-hashed keys
293    ///   corresponding to `prehashed_keys`.
294    ///     - If `Some(keys)`, the method performs a tag-based verification to ensure
295    ///       that the found entry truly belongs to the original key, preventing
296    ///       data retrieval from a hash collision. The length of this slice
297    ///       **must** match the length of `prehashed_keys`.
298    ///     - If `None`, this verification is skipped. The lookup relies solely
299    ///       on the hash, which is faster but carries a theoretical risk of
300    ///       returning incorrect data in the event of a hash collision.
301    ///
302    /// # Returns
303    /// - `Ok(results)`: A `Vec<Option<Self::EntryHandleType>>` where each element
304    ///   corresponds to the result of looking up the key at the same index.
305    /// - `Err(std::io::Error)`: On I/O failure or if the lengths of `prehashed_keys`
306    ///   and `non_hashed_keys` (when `Some`) do not match.
307    async fn batch_read_hashed_keys(
308        &self,
309        prehashed_keys: &[u64],
310        non_hashed_keys: Option<&[&[u8]]>,
311    ) -> Result<Vec<Option<Self::EntryHandleType>>>;
312
313    /// Retrieves metadata for a given key.
314    ///
315    /// This method looks up a key in the storage and returns its associated metadata.
316    ///
317    /// # Parameters:
318    /// - `key`: The **binary key** whose metadata is to be retrieved.
319    ///
320    /// # Returns:
321    /// - `Ok(Some(metadata))`: Metadata if the key exists.
322    /// - `Ok(None)`: If the key is absent.
323    /// - `Err(std::io::Error)`: On I/O failure.
324    async fn read_metadata(&self, key: &[u8]) -> Result<Option<EntryMetadata>>;
325
326    /// Counts **active** (non-deleted) key-value pairs in the storage.
327    ///
328    /// # Returns:
329    /// - `Ok(active_count)`: Total active entries.
330    /// - `Err(std::io::Error)`: On I/O failure.
331    async fn len(&self) -> Result<usize>;
332
333    /// Determines if the store is empty or has no active keys.
334    ///
335    /// # Returns:
336    /// - `Ok(bool)`: Whether or not the store has any active keys.
337    /// - `Err(std::io::Error)`: On I/O failure.
338    async fn is_empty(&self) -> Result<bool>;
339
340    /// Returns the current file size on disk (including those of deleted entries).
341    ///
342    /// # Returns:
343    /// - `Ok(bytes)`: File size in bytes.
344    /// - `Err(std::io::Error)`: On I/O failure.
345    async fn file_size(&self) -> Result<u64>;
346}