simd_r_drive/storage_engine/traits/reader.rs
1use simd_r_drive_entry_handle::EntryMetadata;
2use std::io::Result;
3
4pub trait DataStoreReader {
5 type EntryHandleType;
6
7 /// Checks whether a key currently exists in the store.
8 ///
9 /// This is a **constant‑time** lookup against the in‑memory
10 /// [`crate::storage_engine::KeyIndexer`] map.
11 /// A key is considered to *exist* only if it is present **and not marked
12 /// as deleted**.
13 ///
14 /// # Parameters
15 /// - `key`: The **binary key** to check.
16 ///
17 /// # Returns
18 /// - `Ok(true)`: Key exists and is active.
19 /// - `Ok(false)`: Key is absent or has been deleted.
20 /// - `Err(std::io::Error)`: On I/O failure.
21 fn exists(&self, key: &[u8]) -> Result<bool>;
22
23 /// Checks whether a key with a pre-computed hash exists in the store.
24 ///
25 /// This is a more direct version of [`Self::exists`] that skips the hashing step,
26 /// making it faster if the hash is already known. Because the original key is not
27 /// provided, this check does not perform tag verification and relies solely on the
28 /// hash's presence in the index.
29 ///
30 /// # Parameters
31 /// - `prehashed_key`: The **pre-computed hash** of the key to check.
32 ///
33 /// # Returns
34 /// - `Ok(true)` if the key hash exists in the index.
35 /// - `Ok(false)` if the key hash is absent.
36 /// - `Err(std::io::Error)`: On I/O failure.
37 fn exists_with_key_hash(&self, prehashed_key: u64) -> Result<bool>;
38
39 /// Retrieves the most recent value associated with a given key.
40 ///
41 /// This method **efficiently looks up a key** using a fast in-memory index,
42 /// and returns the latest corresponding value if found.
43 ///
44 /// # Parameters:
45 /// - `key`: The **binary key** whose latest value is to be retrieved.
46 ///
47 /// # Returns:
48 /// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
49 /// - `Ok(None)`: If the key does not exist or is deleted.
50 /// - `Err(std::io::Error)`: On I/O failure.
51 ///
52 /// # Notes:
53 /// - The returned `EntryHandle` provides zero-copy access to the stored data.
54 fn read(&self, key: &[u8]) -> Result<Option<Self::EntryHandleType>>;
55
56 /// Retrieves the most recent value associated with a pre-computed key hash.
57 ///
58 /// This is a low-level alternative to [`Self::read`] that looks up an entry using
59 /// only its hash, bypassing the hashing step.
60 ///
61 /// # Warning
62 /// This method does **not** perform tag verification, as the original key is not
63 /// provided. This means that in the rare event of a hash collision, this function
64 /// could return the entry for a different key.
65 ///
66 /// # Parameters
67 /// - `prehashed_key`: The **pre-computed hash** of the key to retrieve.
68 ///
69 /// # Returns
70 /// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
71 /// - `Ok(None)`: If the key hash does not exist or is deleted.
72 /// - `Err(std::io::Error)`: On I/O failure.
73 fn read_with_key_hash(&self, prehashed_key: u64) -> Result<Option<Self::EntryHandleType>>;
74
75 /// Retrieves the last entry written to the file.
76 ///
77 /// # Returns:
78 /// - `Ok(Some(EntryHandle))`: Handle to the last entry, if any.
79 /// - `Ok(None)`: If the file is empty.
80 /// - `Err(std::io::Error)`: On I/O failure.
81 fn read_last_entry(&self) -> Result<Option<Self::EntryHandleType>>;
82
83 /// Reads many keys in one shot.
84 ///
85 /// This is the **vectorized** counterpart to [`crate::DataStore::read`].
86 /// It takes a slice of raw-byte keys and returns a `Vec` whose *i-th* element
87 /// is the result of looking up the *i-th* key.
88 ///
89 /// * **Zero-copy** – each `Some(EntryHandle)` points directly into the
90 /// shared `Arc<Mmap>`; no payload is copied.
91 /// * **Constant-time per key** – the in-memory [`crate::storage_engine::KeyIndexer`] map is used
92 /// for each lookup, so the complexity is *O(n)* where *n* is
93 /// `keys.len()`.
94 /// * **Thread-safe** – a read lock on the index is taken once for the whole
95 /// batch, so concurrent writers are still blocked only for the same short
96 /// critical section that a single `read` would need.
97 ///
98 /// # Returns:
99 /// - `Ok(results)`: `Vec<Option<EntryHandle>>` in key order.
100 /// - `Err(std::io::Error)`: On I/O failure.
101 fn batch_read(&self, keys: &[&[u8]]) -> Result<Vec<Option<Self::EntryHandleType>>>;
102
103 /// Reads many keys in one shot using pre-computed hashes.
104 ///
105 /// This is a lower-level, high-performance version of [`Self::batch_read`].
106 /// It is designed for scenarios where the caller has already computed the key
107 /// hashes and wants to avoid the overhead of re-hashing. The method offers
108 /// an optional verification step to safeguard against hash collisions.
109 ///
110 /// * **Zero-copy**: Each `Some(EntryHandle)` provides a direct, zero-copy view
111 /// into the memory-mapped file.
112 /// * **High-performance**: Bypasses the key hashing step if hashes are already
113 /// available.
114 /// * **Thread-safe**: Acquires a single read lock for the entire batch
115 /// operation, minimizing contention.
116 ///
117 /// # Parameters
118 /// - `prehashed_keys`: A slice of `u64` key hashes to look up.
119 /// - `non_hashed_keys`: An optional slice of the original, non-hashed keys
120 /// corresponding to `prehashed_keys`.
121 /// - If `Some(keys)`, the method performs a tag-based verification to ensure
122 /// that the found entry truly belongs to the original key, preventing
123 /// data retrieval from a hash collision. The length of this slice
124 /// **must** match the length of `prehashed_keys`.
125 /// - If `None`, this verification is skipped. The lookup relies solely
126 /// on the hash, which is faster but carries a theoretical risk of
127 /// returning incorrect data in the event of a hash collision.
128 ///
129 /// # Returns
130 /// - `Ok(results)`: A `Vec<Option<Self::EntryHandleType>>` where each element
131 /// corresponds to the result of looking up the key at the same index.
132 /// - `Err(std::io::Error)`: On I/O failure or if the lengths of `prehashed_keys`
133 /// and `non_hashed_keys` (when `Some`) do not match.
134 fn batch_read_hashed_keys(
135 &self,
136 prehashed_keys: &[u64],
137 non_hashed_keys: Option<&[&[u8]]>,
138 ) -> Result<Vec<Option<Self::EntryHandleType>>>;
139
140 /// Retrieves metadata for a given key.
141 ///
142 /// This method looks up a key in the storage and returns its associated metadata.
143 ///
144 /// # Parameters:
145 /// - `key`: The **binary key** whose metadata is to be retrieved.
146 ///
147 /// # Returns:
148 /// - `Ok(Some(metadata))`: Metadata if the key exists.
149 /// - `Ok(None)`: If the key is absent.
150 /// - `Err(std::io::Error)`: On I/O failure.
151 fn read_metadata(&self, key: &[u8]) -> Result<Option<EntryMetadata>>;
152
153 /// Counts **active** (non-deleted) key-value pairs in the storage.
154 ///
155 /// # Returns:
156 /// - `Ok(active_count)`: Total active entries.
157 /// - `Err(std::io::Error)`: On I/O failure.
158 fn len(&self) -> Result<usize>;
159
160 /// Determines if the store is empty or has no active keys.
161 ///
162 /// # Returns:
163 /// - `Ok(bool)`: Whether or not the store has any active keys.
164 /// - `Err(std::io::Error)`: On I/O failure.
165 fn is_empty(&self) -> Result<bool>;
166
167 /// Returns the current file size on disk (including those of deleted entries).
168 ///
169 /// # Returns:
170 /// - `Ok(bytes)`: File size in bytes.
171 /// - `Err(std::io::Error)`: On I/O failure.
172 fn file_size(&self) -> Result<u64>;
173}
174
175#[async_trait::async_trait]
176pub trait AsyncDataStoreReader {
177 type EntryHandleType;
178
179 /// Checks whether a key currently exists in the store.
180 ///
181 /// This is a **constant‑time** lookup against the in‑memory
182 /// [`crate::storage_engine::KeyIndexer`] map.
183 /// A key is considered to *exist* only if it is present **and not marked
184 /// as deleted**.
185 ///
186 /// # Parameters
187 /// - `key`: The **binary key** to check.
188 ///
189 /// # Returns
190 /// - `Ok(true)`: Key exists and is active.
191 /// - `Ok(false)`: Key is absent or has been deleted.
192 /// - `Err(std::io::Error)`: On I/O failure.
193 async fn exists(&self, key: &[u8]) -> Result<bool>;
194
195 /// Checks whether a key with a pre-computed hash exists in the store.
196 ///
197 /// This is a more direct version of [`Self::exists`] that skips the hashing step,
198 /// making it faster if the hash is already known. Because the original key is not
199 /// provided, this check does not perform tag verification and relies solely on the
200 /// hash's presence in the index.
201 ///
202 /// # Parameters
203 /// - `prehashed_key`: The **pre-computed hash** of the key to check.
204 ///
205 /// # Returns
206 /// - `Ok(true)` if the key hash exists in the index.
207 /// - `Ok(false)` if the key hash is absent.
208 /// - `Err(std::io::Error)`: On I/O failure.
209 async fn exists_with_key_hash(&self, prehashed_key: u64) -> Result<bool>;
210
211 /// Retrieves the most recent value associated with a given key.
212 ///
213 /// This method **efficiently looks up a key** using a fast in-memory index,
214 /// and returns the latest corresponding value if found.
215 ///
216 /// # Parameters:
217 /// - `key`: The **binary key** whose latest value is to be retrieved.
218 ///
219 /// # Returns:
220 /// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
221 /// - `Ok(None)`: If the key does not exist or is deleted.
222 /// - `Err(std::io::Error)`: On I/O failure.
223 ///
224 /// # Notes:
225 /// - The returned `EntryHandle` provides zero-copy access to the stored data.
226 async fn read(&self, key: &[u8]) -> Result<Option<Self::EntryHandleType>>;
227
228 /// Retrieves the most recent value associated with a pre-computed key hash.
229 ///
230 /// This is a low-level alternative to [`Self::read`] that looks up an entry using
231 /// only its hash, bypassing the hashing step.
232 ///
233 /// # Warning
234 /// This method does **not** perform tag verification, as the original key is not
235 /// provided. This means that in the rare event of a hash collision, this function
236 /// could return the entry for a different key.
237 ///
238 /// # Parameters
239 /// - `prehashed_key`: The **pre-computed hash** of the key to retrieve.
240 ///
241 /// # Returns
242 /// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
243 /// - `Ok(None)`: If the key hash does not exist or is deleted.
244 /// - `Err(std::io::Error)`: On I/O failure.
245 async fn read_with_key_hash(&self, prehashed_key: u64)
246 -> Result<Option<Self::EntryHandleType>>;
247
248 /// Retrieves the last entry written to the file.
249 ///
250 /// # Returns:
251 /// - `Ok(Some(EntryHandle))`: Handle to the last entry, if any.
252 /// - `Ok(None)`: If the file is empty.
253 /// - `Err(std::io::Error)`: On I/O failure.
254 async fn read_last_entry(&self) -> Result<Option<Self::EntryHandleType>>;
255
256 /// Reads many keys in one shot.
257 ///
258 /// This is the **vectorized** counterpart to [`crate::DataStore::read`].
259 /// It takes a slice of raw-byte keys and returns a `Vec` whose *i-th* element
260 /// is the result of looking up the *i-th* key.
261 ///
262 /// * **Zero-copy** – each `Some(EntryHandle)` points directly into the
263 /// shared `Arc<Mmap>`; no payload is copied.
264 /// * **Constant-time per key** – the in-memory [`crate::storage_engine::KeyIndexer`] map is used
265 /// for each lookup, so the complexity is *O(n)* where *n* is
266 /// `keys.len()`.
267 /// * **Thread-safe** – a read lock on the index is taken once for the whole
268 /// batch, so concurrent writers are still blocked only for the same short
269 /// critical section that a single `read` would need.
270 ///
271 /// # Returns:
272 /// - `Ok(results)`: `Vec<Option<EntryHandle>>` in key order.
273 /// - `Err(std::io::Error)`: On I/O failure.
274 async fn batch_read(&self, keys: &[&[u8]]) -> Result<Vec<Option<Self::EntryHandleType>>>;
275
276 /// Reads many keys in one shot using pre-computed hashes.
277 ///
278 /// This is a lower-level, high-performance version of [`Self::batch_read`].
279 /// It is designed for scenarios where the caller has already computed the key
280 /// hashes and wants to avoid the overhead of re-hashing. The method offers
281 /// an optional verification step to safeguard against hash collisions.
282 ///
283 /// * **Zero-copy**: Each `Some(EntryHandle)` provides a direct, zero-copy view
284 /// into the memory-mapped file.
285 /// * **High-performance**: Bypasses the key hashing step if hashes are already
286 /// available.
287 /// * **Thread-safe**: Acquires a single read lock for the entire batch
288 /// operation, minimizing contention.
289 ///
290 /// # Parameters
291 /// - `prehashed_keys`: A slice of `u64` key hashes to look up.
292 /// - `non_hashed_keys`: An optional slice of the original, non-hashed keys
293 /// corresponding to `prehashed_keys`.
294 /// - If `Some(keys)`, the method performs a tag-based verification to ensure
295 /// that the found entry truly belongs to the original key, preventing
296 /// data retrieval from a hash collision. The length of this slice
297 /// **must** match the length of `prehashed_keys`.
298 /// - If `None`, this verification is skipped. The lookup relies solely
299 /// on the hash, which is faster but carries a theoretical risk of
300 /// returning incorrect data in the event of a hash collision.
301 ///
302 /// # Returns
303 /// - `Ok(results)`: A `Vec<Option<Self::EntryHandleType>>` where each element
304 /// corresponds to the result of looking up the key at the same index.
305 /// - `Err(std::io::Error)`: On I/O failure or if the lengths of `prehashed_keys`
306 /// and `non_hashed_keys` (when `Some`) do not match.
307 async fn batch_read_hashed_keys(
308 &self,
309 prehashed_keys: &[u64],
310 non_hashed_keys: Option<&[&[u8]]>,
311 ) -> Result<Vec<Option<Self::EntryHandleType>>>;
312
313 /// Retrieves metadata for a given key.
314 ///
315 /// This method looks up a key in the storage and returns its associated metadata.
316 ///
317 /// # Parameters:
318 /// - `key`: The **binary key** whose metadata is to be retrieved.
319 ///
320 /// # Returns:
321 /// - `Ok(Some(metadata))`: Metadata if the key exists.
322 /// - `Ok(None)`: If the key is absent.
323 /// - `Err(std::io::Error)`: On I/O failure.
324 async fn read_metadata(&self, key: &[u8]) -> Result<Option<EntryMetadata>>;
325
326 /// Counts **active** (non-deleted) key-value pairs in the storage.
327 ///
328 /// # Returns:
329 /// - `Ok(active_count)`: Total active entries.
330 /// - `Err(std::io::Error)`: On I/O failure.
331 async fn len(&self) -> Result<usize>;
332
333 /// Determines if the store is empty or has no active keys.
334 ///
335 /// # Returns:
336 /// - `Ok(bool)`: Whether or not the store has any active keys.
337 /// - `Err(std::io::Error)`: On I/O failure.
338 async fn is_empty(&self) -> Result<bool>;
339
340 /// Returns the current file size on disk (including those of deleted entries).
341 ///
342 /// # Returns:
343 /// - `Ok(bytes)`: File size in bytes.
344 /// - `Err(std::io::Error)`: On I/O failure.
345 async fn file_size(&self) -> Result<u64>;
346}