1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
use simd_r_drive_entry_handle::EntryMetadata;
use std::io::Result;
pub trait DataStoreReader {
type EntryHandleType;
/// Checks whether a key currently exists in the store.
///
/// This is a **constant‑time** lookup against the in‑memory
/// [`crate::storage_engine::KeyIndexer`] map.
/// A key is considered to *exist* only if it is present **and not marked
/// as deleted**.
///
/// # Parameters
/// - `key`: The **binary key** to check.
///
/// # Returns
/// - `Ok(true)`: Key exists and is active.
/// - `Ok(false)`: Key is absent or has been deleted.
/// - `Err(std::io::Error)`: On I/O failure.
fn exists(&self, key: &[u8]) -> Result<bool>;
/// Checks whether a key with a pre-computed hash exists in the store.
///
/// This is a more direct version of [`Self::exists`] that skips the hashing step,
/// making it faster if the hash is already known. Because the original key is not
/// provided, this check does not perform tag verification and relies solely on the
/// hash's presence in the index.
///
/// # Parameters
/// - `prehashed_key`: The **pre-computed hash** of the key to check.
///
/// # Returns
/// - `Ok(true)` if the key hash exists in the index.
/// - `Ok(false)` if the key hash is absent.
/// - `Err(std::io::Error)`: On I/O failure.
fn exists_with_key_hash(&self, prehashed_key: u64) -> Result<bool>;
/// Retrieves the most recent value associated with a given key.
///
/// This method **efficiently looks up a key** using a fast in-memory index,
/// and returns the latest corresponding value if found.
///
/// # Parameters:
/// - `key`: The **binary key** whose latest value is to be retrieved.
///
/// # Returns:
/// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
/// - `Ok(None)`: If the key does not exist or is deleted.
/// - `Err(std::io::Error)`: On I/O failure.
///
/// # Notes:
/// - The returned `EntryHandle` provides zero-copy access to the stored data.
fn read(&self, key: &[u8]) -> Result<Option<Self::EntryHandleType>>;
/// Retrieves the most recent value associated with a pre-computed key hash.
///
/// This is a low-level alternative to [`Self::read`] that looks up an entry using
/// only its hash, bypassing the hashing step.
///
/// # Warning
/// This method does **not** perform tag verification, as the original key is not
/// provided. This means that in the rare event of a hash collision, this function
/// could return the entry for a different key.
///
/// # Parameters
/// - `prehashed_key`: The **pre-computed hash** of the key to retrieve.
///
/// # Returns
/// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
/// - `Ok(None)`: If the key hash does not exist or is deleted.
/// - `Err(std::io::Error)`: On I/O failure.
fn read_with_key_hash(&self, prehashed_key: u64) -> Result<Option<Self::EntryHandleType>>;
/// Retrieves the last entry written to the file.
///
/// # Returns:
/// - `Ok(Some(EntryHandle))`: Handle to the last entry, if any.
/// - `Ok(None)`: If the file is empty.
/// - `Err(std::io::Error)`: On I/O failure.
fn read_last_entry(&self) -> Result<Option<Self::EntryHandleType>>;
/// Reads many keys in one shot.
///
/// This is the **vectorized** counterpart to [`crate::DataStore::read`].
/// It takes a slice of raw-byte keys and returns a `Vec` whose *i-th* element
/// is the result of looking up the *i-th* key.
///
/// * **Zero-copy** – each `Some(EntryHandle)` points directly into the
/// shared `Arc<Mmap>`; no payload is copied.
/// * **Constant-time per key** – the in-memory [`crate::storage_engine::KeyIndexer`] map is used
/// for each lookup, so the complexity is *O(n)* where *n* is
/// `keys.len()`.
/// * **Thread-safe** – a read lock on the index is taken once for the whole
/// batch, so concurrent writers are still blocked only for the same short
/// critical section that a single `read` would need.
///
/// # Returns:
/// - `Ok(results)`: `Vec<Option<EntryHandle>>` in key order.
/// - `Err(std::io::Error)`: On I/O failure.
fn batch_read(&self, keys: &[&[u8]]) -> Result<Vec<Option<Self::EntryHandleType>>>;
/// Reads many keys in one shot using pre-computed hashes.
///
/// This is a lower-level, high-performance version of [`Self::batch_read`].
/// It is designed for scenarios where the caller has already computed the key
/// hashes and wants to avoid the overhead of re-hashing. The method offers
/// an optional verification step to safeguard against hash collisions.
///
/// * **Zero-copy**: Each `Some(EntryHandle)` provides a direct, zero-copy view
/// into the memory-mapped file.
/// * **High-performance**: Bypasses the key hashing step if hashes are already
/// available.
/// * **Thread-safe**: Acquires a single read lock for the entire batch
/// operation, minimizing contention.
///
/// # Parameters
/// - `prehashed_keys`: A slice of `u64` key hashes to look up.
/// - `non_hashed_keys`: An optional slice of the original, non-hashed keys
/// corresponding to `prehashed_keys`.
/// - If `Some(keys)`, the method performs a tag-based verification to ensure
/// that the found entry truly belongs to the original key, preventing
/// data retrieval from a hash collision. The length of this slice
/// **must** match the length of `prehashed_keys`.
/// - If `None`, this verification is skipped. The lookup relies solely
/// on the hash, which is faster but carries a theoretical risk of
/// returning incorrect data in the event of a hash collision.
///
/// # Returns
/// - `Ok(results)`: A `Vec<Option<Self::EntryHandleType>>` where each element
/// corresponds to the result of looking up the key at the same index.
/// - `Err(std::io::Error)`: On I/O failure or if the lengths of `prehashed_keys`
/// and `non_hashed_keys` (when `Some`) do not match.
fn batch_read_hashed_keys(
&self,
prehashed_keys: &[u64],
non_hashed_keys: Option<&[&[u8]]>,
) -> Result<Vec<Option<Self::EntryHandleType>>>;
/// Retrieves metadata for a given key.
///
/// This method looks up a key in the storage and returns its associated metadata.
///
/// # Parameters:
/// - `key`: The **binary key** whose metadata is to be retrieved.
///
/// # Returns:
/// - `Ok(Some(metadata))`: Metadata if the key exists.
/// - `Ok(None)`: If the key is absent.
/// - `Err(std::io::Error)`: On I/O failure.
fn read_metadata(&self, key: &[u8]) -> Result<Option<EntryMetadata>>;
/// Counts **active** (non-deleted) key-value pairs in the storage.
///
/// # Returns:
/// - `Ok(active_count)`: Total active entries.
/// - `Err(std::io::Error)`: On I/O failure.
fn len(&self) -> Result<usize>;
/// Determines if the store is empty or has no active keys.
///
/// # Returns:
/// - `Ok(bool)`: Whether or not the store has any active keys.
/// - `Err(std::io::Error)`: On I/O failure.
fn is_empty(&self) -> Result<bool>;
/// Returns the current file size on disk (including those of deleted entries).
///
/// # Returns:
/// - `Ok(bytes)`: File size in bytes.
/// - `Err(std::io::Error)`: On I/O failure.
fn file_size(&self) -> Result<u64>;
}
#[async_trait::async_trait]
pub trait AsyncDataStoreReader {
type EntryHandleType;
/// Checks whether a key currently exists in the store.
///
/// This is a **constant‑time** lookup against the in‑memory
/// [`crate::storage_engine::KeyIndexer`] map.
/// A key is considered to *exist* only if it is present **and not marked
/// as deleted**.
///
/// # Parameters
/// - `key`: The **binary key** to check.
///
/// # Returns
/// - `Ok(true)`: Key exists and is active.
/// - `Ok(false)`: Key is absent or has been deleted.
/// - `Err(std::io::Error)`: On I/O failure.
async fn exists(&self, key: &[u8]) -> Result<bool>;
/// Checks whether a key with a pre-computed hash exists in the store.
///
/// This is a more direct version of [`Self::exists`] that skips the hashing step,
/// making it faster if the hash is already known. Because the original key is not
/// provided, this check does not perform tag verification and relies solely on the
/// hash's presence in the index.
///
/// # Parameters
/// - `prehashed_key`: The **pre-computed hash** of the key to check.
///
/// # Returns
/// - `Ok(true)` if the key hash exists in the index.
/// - `Ok(false)` if the key hash is absent.
/// - `Err(std::io::Error)`: On I/O failure.
async fn exists_with_key_hash(&self, prehashed_key: u64) -> Result<bool>;
/// Retrieves the most recent value associated with a given key.
///
/// This method **efficiently looks up a key** using a fast in-memory index,
/// and returns the latest corresponding value if found.
///
/// # Parameters:
/// - `key`: The **binary key** whose latest value is to be retrieved.
///
/// # Returns:
/// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
/// - `Ok(None)`: If the key does not exist or is deleted.
/// - `Err(std::io::Error)`: On I/O failure.
///
/// # Notes:
/// - The returned `EntryHandle` provides zero-copy access to the stored data.
async fn read(&self, key: &[u8]) -> Result<Option<Self::EntryHandleType>>;
/// Retrieves the most recent value associated with a pre-computed key hash.
///
/// This is a low-level alternative to [`Self::read`] that looks up an entry using
/// only its hash, bypassing the hashing step.
///
/// # Warning
/// This method does **not** perform tag verification, as the original key is not
/// provided. This means that in the rare event of a hash collision, this function
/// could return the entry for a different key.
///
/// # Parameters
/// - `prehashed_key`: The **pre-computed hash** of the key to retrieve.
///
/// # Returns
/// - `Ok(Some(EntryHandle))`: Handle to the entry if found.
/// - `Ok(None)`: If the key hash does not exist or is deleted.
/// - `Err(std::io::Error)`: On I/O failure.
async fn read_with_key_hash(&self, prehashed_key: u64)
-> Result<Option<Self::EntryHandleType>>;
/// Retrieves the last entry written to the file.
///
/// # Returns:
/// - `Ok(Some(EntryHandle))`: Handle to the last entry, if any.
/// - `Ok(None)`: If the file is empty.
/// - `Err(std::io::Error)`: On I/O failure.
async fn read_last_entry(&self) -> Result<Option<Self::EntryHandleType>>;
/// Reads many keys in one shot.
///
/// This is the **vectorized** counterpart to [`crate::DataStore::read`].
/// It takes a slice of raw-byte keys and returns a `Vec` whose *i-th* element
/// is the result of looking up the *i-th* key.
///
/// * **Zero-copy** – each `Some(EntryHandle)` points directly into the
/// shared `Arc<Mmap>`; no payload is copied.
/// * **Constant-time per key** – the in-memory [`crate::storage_engine::KeyIndexer`] map is used
/// for each lookup, so the complexity is *O(n)* where *n* is
/// `keys.len()`.
/// * **Thread-safe** – a read lock on the index is taken once for the whole
/// batch, so concurrent writers are still blocked only for the same short
/// critical section that a single `read` would need.
///
/// # Returns:
/// - `Ok(results)`: `Vec<Option<EntryHandle>>` in key order.
/// - `Err(std::io::Error)`: On I/O failure.
async fn batch_read(&self, keys: &[&[u8]]) -> Result<Vec<Option<Self::EntryHandleType>>>;
/// Reads many keys in one shot using pre-computed hashes.
///
/// This is a lower-level, high-performance version of [`Self::batch_read`].
/// It is designed for scenarios where the caller has already computed the key
/// hashes and wants to avoid the overhead of re-hashing. The method offers
/// an optional verification step to safeguard against hash collisions.
///
/// * **Zero-copy**: Each `Some(EntryHandle)` provides a direct, zero-copy view
/// into the memory-mapped file.
/// * **High-performance**: Bypasses the key hashing step if hashes are already
/// available.
/// * **Thread-safe**: Acquires a single read lock for the entire batch
/// operation, minimizing contention.
///
/// # Parameters
/// - `prehashed_keys`: A slice of `u64` key hashes to look up.
/// - `non_hashed_keys`: An optional slice of the original, non-hashed keys
/// corresponding to `prehashed_keys`.
/// - If `Some(keys)`, the method performs a tag-based verification to ensure
/// that the found entry truly belongs to the original key, preventing
/// data retrieval from a hash collision. The length of this slice
/// **must** match the length of `prehashed_keys`.
/// - If `None`, this verification is skipped. The lookup relies solely
/// on the hash, which is faster but carries a theoretical risk of
/// returning incorrect data in the event of a hash collision.
///
/// # Returns
/// - `Ok(results)`: A `Vec<Option<Self::EntryHandleType>>` where each element
/// corresponds to the result of looking up the key at the same index.
/// - `Err(std::io::Error)`: On I/O failure or if the lengths of `prehashed_keys`
/// and `non_hashed_keys` (when `Some`) do not match.
async fn batch_read_hashed_keys(
&self,
prehashed_keys: &[u64],
non_hashed_keys: Option<&[&[u8]]>,
) -> Result<Vec<Option<Self::EntryHandleType>>>;
/// Retrieves metadata for a given key.
///
/// This method looks up a key in the storage and returns its associated metadata.
///
/// # Parameters:
/// - `key`: The **binary key** whose metadata is to be retrieved.
///
/// # Returns:
/// - `Ok(Some(metadata))`: Metadata if the key exists.
/// - `Ok(None)`: If the key is absent.
/// - `Err(std::io::Error)`: On I/O failure.
async fn read_metadata(&self, key: &[u8]) -> Result<Option<EntryMetadata>>;
/// Counts **active** (non-deleted) key-value pairs in the storage.
///
/// # Returns:
/// - `Ok(active_count)`: Total active entries.
/// - `Err(std::io::Error)`: On I/O failure.
async fn len(&self) -> Result<usize>;
/// Determines if the store is empty or has no active keys.
///
/// # Returns:
/// - `Ok(bool)`: Whether or not the store has any active keys.
/// - `Err(std::io::Error)`: On I/O failure.
async fn is_empty(&self) -> Result<bool>;
/// Returns the current file size on disk (including those of deleted entries).
///
/// # Returns:
/// - `Ok(bytes)`: File size in bytes.
/// - `Err(std::io::Error)`: On I/O failure.
async fn file_size(&self) -> Result<u64>;
}