simd_r_drive/storage_engine/traits/writer.rs
1use crate::storage_engine::DataStore;
2use std::io::{Read, Result};
3
4pub trait DataStoreWriter {
5 /// Writes an entry using a streaming `Read` source (e.g., file, network).
6 ///
7 /// This method is designed for **large entries** that may exceed available RAM,
8 /// allowing them to be written in chunks without loading the full payload into memory.
9 ///
10 /// # Parameters:
11 /// - `key`: The **binary key** for the entry.
12 /// - `reader`: A **streaming reader** (`Read` trait) supplying the entry's content.
13 ///
14 /// # Returns:
15 /// - `Ok(offset)`: The file offset where the entry was written.
16 /// - `Err(std::io::Error)`: If a write or I/O operation fails.
17 ///
18 /// # Notes:
19 /// - Internally, this method delegates to `write_stream_with_key_hash`, computing
20 /// the key hash first.
21 /// - If the entry is **small enough to fit in memory**, consider using `write()`
22 /// or `batch_write()` instead if you don't want to stream the data in.
23 ///
24 /// # Streaming Behavior:
25 /// - The `reader` is **read incrementally in 64KB chunks** (`WRITE_STREAM_BUFFER_SIZE`).
26 /// - Data is immediately **written to disk** as it is read.
27 /// - **A checksum is computed incrementally** during the write.
28 /// - Metadata is appended **after** the full entry is written.
29 fn write_stream<R: Read>(&self, key: &[u8], reader: &mut R) -> Result<u64>;
30
31 /// Writes an entry using a **precomputed key hash** and a streaming `Read` source.
32 ///
33 /// This is a **low-level** method that operates like `write_stream`, but requires
34 /// the key to be hashed beforehand. It is primarily used internally to avoid
35 /// redundant hash computations when writing multiple entries.
36 ///
37 /// # Parameters:
38 /// - `key_hash`: The **precomputed hash** of the key.
39 /// - `reader`: A **streaming reader** (`Read` trait) supplying the entry's content.
40 ///
41 /// # Returns:
42 /// - `Ok(offset)`: The file offset where the entry was written.
43 /// - `Err(std::io::Error)`: If a write or I/O operation fails.
44 fn write_stream_with_key_hash<R: Read>(&self, key_hash: u64, reader: &mut R) -> Result<u64>;
45
46 /// Writes an entry with a given key and payload.
47 ///
48 /// This method computes the hash of the key and delegates to `write_with_key_hash()`.
49 /// It is a **high-level API** for adding new entries to the storage.
50 ///
51 /// # Parameters:
52 /// - `key`: The **binary key** associated with the entry.
53 /// - `payload`: The **data payload** to be stored.
54 ///
55 /// # Returns:
56 /// - `Ok(offset)`: The file offset where the entry was written.
57 /// - `Err(std::io::Error)`: If a write operation fails.
58 ///
59 /// # Notes:
60 /// - If you need streaming support, use `write_stream` instead.
61 /// - If multiple entries with the **same key** are written, the most recent
62 /// entry will be retrieved when reading.
63 /// - This method **locks the file for writing** to ensure consistency.
64 /// - For writing **multiple entries at once**, use `batch_write()`.
65 fn write(&self, key: &[u8], payload: &[u8]) -> Result<u64>;
66
67 /// Writes an entry using a **precomputed key hash** and a payload.
68 ///
69 /// This method is a **low-level** alternative to `write()`, allowing direct
70 /// specification of the key hash. It is mainly used for optimized workflows
71 /// where the key hash is already known, avoiding redundant computations.
72 ///
73 /// # Parameters:
74 /// - `key_hash`: The **precomputed hash** of the key.
75 /// - `payload`: The **data payload** to be stored.
76 ///
77 /// # Returns:
78 /// - `Ok(offset)`: The file offset where the entry was written.
79 /// - `Err(std::io::Error)`: If a write operation fails.
80 ///
81 /// # Notes:
82 /// - The caller is responsible for ensuring that `key_hash` is correctly computed.
83 /// - This method **locks the file for writing** to maintain consistency.
84 /// - If writing **multiple entries**, consider using `batch_write_with_key_hashes()`.
85 fn write_with_key_hash(&self, key_hash: u64, payload: &[u8]) -> Result<u64>;
86
87 /// Writes multiple key-value pairs as a **single transaction**.
88 ///
89 /// This method computes the hashes of the provided keys and delegates to
90 /// `batch_write_with_key_hashes()`, ensuring all writes occur in a single
91 /// locked operation for efficiency.
92 ///
93 /// # Parameters:
94 /// - `entries`: A **slice of key-value pairs**, where:
95 /// - `key`: The **binary key** for the entry.
96 /// - `payload`: The **data payload** to be stored.
97 ///
98 /// # Returns:
99 /// - `Ok(final_offset)`: The file offset after all writes.
100 /// - `Err(std::io::Error)`: If a write operation fails.
101 ///
102 /// # Notes:
103 /// - This method improves efficiency by **minimizing file lock contention**.
104 /// - If a large number of entries are written, **batching reduces overhead**.
105 /// - If the key hashes are already computed, use `batch_write_with_key_hashes()`.
106 fn batch_write(&self, entries: &[(&[u8], &[u8])]) -> Result<u64>;
107
108 /// Writes multiple key-value pairs as a **single transaction**, using precomputed key hashes.
109 ///
110 /// This method efficiently appends multiple entries in a **batch operation**,
111 /// reducing lock contention and improving performance for bulk writes.
112 ///
113 /// # Parameters:
114 /// - `prehashed_keys`: A **vector of precomputed key hashes and payloads**, where:
115 /// - `key_hash`: The **precomputed hash** of the key.
116 /// - `payload`: The **data payload** to be stored.
117 ///
118 /// # Returns:
119 /// - `Ok(final_offset)`: The file offset after all writes.
120 /// - `Err(std::io::Error)`: If a write operation fails.
121 ///
122 /// # Notes:
123 /// - **File locking is performed only once** for all writes, improving efficiency.
124 /// - If an entry's `payload` is empty, an error is returned.
125 /// - This method uses **SIMD-accelerated memory copy (`simd_copy`)** to optimize write
126 /// performance.
127 /// - **Metadata (checksums, offsets) is written after payloads** to ensure data integrity.
128 /// - After writing, the memory-mapped file (`mmap`) is **remapped** to reflect updates.
129 ///
130 /// # Efficiency Considerations:
131 /// - **Faster than multiple `write()` calls**, since it reduces lock contention.
132 /// - Suitable for **bulk insertions** where key hashes are known beforehand.
133 /// - If keys are available but not hashed, use `batch_write()` instead.
134 fn batch_write_with_key_hashes(
135 &self,
136 prehashed_keys: Vec<(u64, &[u8])>,
137 allow_null_bytes: bool,
138 ) -> Result<u64>;
139
140 /// Renames an existing entry by copying it under a new key and marking the old key as deleted.
141 ///
142 /// This function:
143 /// - Reads the existing entry associated with `old_key`.
144 /// - Writes the same data under `new_key`.
145 /// - Deletes the `old_key` by appending a tombstone entry.
146 ///
147 /// # Parameters:
148 /// - `old_key`: The **original key** of the entry to be renamed.
149 /// - `new_key`: The **new key** under which the entry will be stored.
150 ///
151 /// # Returns:
152 /// - `Ok(new_offset)`: The file offset where the new entry was written.
153 /// - `Err(std::io::Error)`: If the old key is not found or if a write operation fails.
154 ///
155 /// # Notes:
156 /// - This operation **does not modify** the original entry but instead appends a new copy.
157 /// - The old key is **logically deleted** via an append-only tombstone.
158 /// - Attempting to rename a key to itself will return an error.
159 fn rename(&self, old_key: &[u8], new_key: &[u8]) -> Result<u64>;
160
161 /// Copies an entry to a **different storage container**.
162 ///
163 /// This function:
164 /// - Reads the entry associated with `key` in the current storage.
165 /// - Writes it to the `target` storage.
166 ///
167 /// # Parameters:
168 /// - `key`: The **key** of the entry to be copied.
169 /// - `target`: The **destination storage** where the entry should be copied.
170 ///
171 /// # Returns:
172 /// - `Ok(target_offset)`: The file offset where the copied entry was written in the target storage.
173 /// - `Err(std::io::Error)`: If the key is not found, if the write operation fails,
174 /// or if attempting to copy to the same storage.
175 ///
176 /// # Notes:
177 /// - Copying within the **same** storage is unnecessary; use `rename` instead.
178 /// - This operation does **not** delete the original entry.
179 fn copy(&self, key: &[u8], target: &DataStore) -> Result<u64>;
180
181 /// Moves an entry from the current storage to a **different storage container**.
182 ///
183 /// This function:
184 /// - Copies the entry from the current storage to `target`.
185 /// - Marks the original entry as deleted.
186 ///
187 /// # Parameters:
188 /// - `key`: The **key** of the entry to be moved.
189 /// - `target`: The **destination storage** where the entry should be moved.
190 ///
191 /// # Returns:
192 /// - `Ok(target_offset)`: The file offset where the entry was written in the target storage.
193 /// - `Err(std::io::Error)`: If the key is not found, or if the copy/delete operation fails.
194 ///
195 /// # Notes:
196 /// - Moving an entry within the **same** storage is unnecessary; use `rename` instead.
197 /// - The original entry is **logically deleted** by appending a tombstone, maintaining
198 /// the append-only structure.
199 fn transfer(&self, key: &[u8], target: &DataStore) -> Result<u64>;
200
201 /// Logically deletes an entry by its key.
202 ///
203 /// The storage engine is **append-only**, so entries are not removed directly.
204 /// Instead, this method appends a **tombstone marker** to logically delete the key.
205 ///
206 /// This operation first **verifies that the key exists** before appending a tombstone.
207 /// If the key is not found, no data is written to the file, and the operation
208 /// succeeds without changing the store's state.
209 ///
210 /// # Parameters
211 /// - `key`: The **binary key** to mark as deleted.
212 ///
213 /// # Returns
214 /// - `Ok(tail_offset)`: The file's tail offset after the operation completes.
215 /// - `Err(std::io::Error)`: On I/O failure.
216 fn delete(&self, key: &[u8]) -> Result<u64>;
217
218 /// Deletes a batch of entries from the storage by their keys.
219 ///
220 /// This method computes the hash for each key and then calls the underlying
221 /// `batch_delete_key_hashes` method. It will only write deletion markers
222 /// (tombstones) for keys that currently exist in the store.
223 ///
224 /// # Parameters
225 /// - `keys`: A slice of keys to be deleted.
226 ///
227 /// # Returns
228 /// - `Ok(tail_offset)`: The new tail offset of the file after the operation.
229 /// - `Err(std::io::Error)`: On I/O failure.
230 fn batch_delete(&self, keys: &[&[u8]]) -> Result<u64>;
231
232 /// Deletes a batch of entries from the storage using pre-computed key hashes.
233 ///
234 /// This is the lowest-level batch deletion method. It checks for the existence
235 /// of each key hash in the in-memory index before writing a deletion marker.
236 /// This prevents the store from being filled with unnecessary tombstones for
237 /// keys that were never present.
238 ///
239 /// # Parameters
240 /// - `prehashed_keys`: A slice of `u64` key hashes to be deleted.
241 ///
242 /// # Returns
243 /// - `Ok(tail_offset)`: The new tail offset of the file after the operation.
244 /// - `Err(std::io::Error)`: On I/O failure.
245 fn batch_delete_key_hashes(&self, prehashed_keys: &[u64]) -> Result<u64>;
246}
247
248#[async_trait::async_trait]
249pub trait AsyncDataStoreWriter {
250 /// Writes an entry using a streaming `Read` source (e.g., file, network).
251 ///
252 /// This method is designed for **large entries** that may exceed available RAM,
253 /// allowing them to be written in chunks without loading the full payload into memory.
254 ///
255 /// # Parameters:
256 /// - `key`: The **binary key** for the entry.
257 /// - `reader`: A **streaming reader** (`Read` trait) supplying the entry's content.
258 ///
259 /// # Returns:
260 /// - `Ok(offset)`: The file offset where the entry was written.
261 /// - `Err(std::io::Error)`: If a write or I/O operation fails.
262 ///
263 /// # Notes:
264 /// - Internally, this method delegates to `write_stream_with_key_hash`, computing
265 /// the key hash first.
266 /// - If the entry is **small enough to fit in memory**, consider using `write()`
267 /// or `batch_write()` instead if you don't want to stream the data in.
268 ///
269 /// # Streaming Behavior:
270 /// - The `reader` is **read incrementally in 64KB chunks** (`WRITE_STREAM_BUFFER_SIZE`).
271 /// - Data is immediately **written to disk** as it is read.
272 /// - **A checksum is computed incrementally** during the write.
273 /// - Metadata is appended **after** the full entry is written.
274 async fn write_stream<R: Read>(&self, key: &[u8], reader: &mut R) -> Result<u64>;
275
276 /// Writes an entry using a **precomputed key hash** and a streaming `Read` source.
277 ///
278 /// This is a **low-level** method that operates like `write_stream`, but requires
279 /// the key to be hashed beforehand. It is primarily used internally to avoid
280 /// redundant hash computations when writing multiple entries.
281 ///
282 /// # Parameters:
283 /// - `key_hash`: The **precomputed hash** of the key.
284 /// - `reader`: A **streaming reader** (`Read` trait) supplying the entry's content.
285 ///
286 /// # Returns:
287 /// - `Ok(offset)`: The file offset where the entry was written.
288 /// - `Err(std::io::Error)`: If a write or I/O operation fails.
289 async fn write_stream_with_key_hash<R: Read>(
290 &self,
291 key_hash: u64,
292 reader: &mut R,
293 ) -> Result<u64>;
294
295 /// Writes an entry with a given key and payload.
296 ///
297 /// This method computes the hash of the key and delegates to `write_with_key_hash()`.
298 /// It is a **high-level API** for adding new entries to the storage.
299 ///
300 /// # Parameters:
301 /// - `key`: The **binary key** associated with the entry.
302 /// - `payload`: The **data payload** to be stored.
303 ///
304 /// # Returns:
305 /// - `Ok(offset)`: The file offset where the entry was written.
306 /// - `Err(std::io::Error)`: If a write operation fails.
307 ///
308 /// # Notes:
309 /// - If you need streaming support, use `write_stream` instead.
310 /// - If multiple entries with the **same key** are written, the most recent
311 /// entry will be retrieved when reading.
312 /// - This method **locks the file for writing** to ensure consistency.
313 /// - For writing **multiple entries at once**, use `batch_write()`.
314 async fn write(&self, key: &[u8], payload: &[u8]) -> Result<u64>;
315
316 /// Writes an entry using a **precomputed key hash** and a payload.
317 ///
318 /// This method is a **low-level** alternative to `write()`, allowing direct
319 /// specification of the key hash. It is mainly used for optimized workflows
320 /// where the key hash is already known, avoiding redundant computations.
321 ///
322 /// # Parameters:
323 /// - `key_hash`: The **precomputed hash** of the key.
324 /// - `payload`: The **data payload** to be stored.
325 ///
326 /// # Returns:
327 /// - `Ok(offset)`: The file offset where the entry was written.
328 /// - `Err(std::io::Error)`: If a write operation fails.
329 ///
330 /// # Notes:
331 /// - The caller is responsible for ensuring that `key_hash` is correctly computed.
332 /// - This method **locks the file for writing** to maintain consistency.
333 /// - If writing **multiple entries**, consider using `batch_write_with_key_hashes()`.
334 async fn write_with_key_hash(&self, key_hash: u64, payload: &[u8]) -> Result<u64>;
335
336 /// Writes multiple key-value pairs as a **single transaction**.
337 ///
338 /// This method computes the hashes of the provided keys and delegates to
339 /// `batch_write_with_key_hashes()`, ensuring all writes occur in a single
340 /// locked operation for efficiency.
341 ///
342 /// # Parameters:
343 /// - `entries`: A **slice of key-value pairs**, where:
344 /// - `key`: The **binary key** for the entry.
345 /// - `payload`: The **data payload** to be stored.
346 ///
347 /// # Returns:
348 /// - `Ok(final_offset)`: The file offset after all writes.
349 /// - `Err(std::io::Error)`: If a write operation fails.
350 ///
351 /// # Notes:
352 /// - This method improves efficiency by **minimizing file lock contention**.
353 /// - If a large number of entries are written, **batching reduces overhead**.
354 /// - If the key hashes are already computed, use `batch_write_with_key_hashes()`.
355 async fn batch_write(&self, entries: &[(&[u8], &[u8])]) -> Result<u64>;
356
357 /// Writes multiple key-value pairs as a **single transaction**, using precomputed key hashes.
358 ///
359 /// This method efficiently appends multiple entries in a **batch operation**,
360 /// reducing lock contention and improving performance for bulk writes.
361 ///
362 /// # Parameters:
363 /// - `prehashed_keys`: A **vector of precomputed key hashes and payloads**, where:
364 /// - `key_hash`: The **precomputed hash** of the key.
365 /// - `payload`: The **data payload** to be stored.
366 ///
367 /// # Returns:
368 /// - `Ok(final_offset)`: The file offset after all writes.
369 /// - `Err(std::io::Error)`: If a write operation fails.
370 ///
371 /// # Notes:
372 /// - **File locking is performed only once** for all writes, improving efficiency.
373 /// - If an entry's `payload` is empty, an error is returned.
374 /// - This method uses **SIMD-accelerated memory copy (`simd_copy`)** to optimize write
375 /// performance.
376 /// - **Metadata (checksums, offsets) is written after payloads** to ensure data integrity.
377 /// - After writing, the memory-mapped file (`mmap`) is **remapped** to reflect updates.
378 ///
379 /// # Efficiency Considerations:
380 /// - **Faster than multiple `write()` calls**, since it reduces lock contention.
381 /// - Suitable for **bulk insertions** where key hashes are known beforehand.
382 /// - If keys are available but not hashed, use `batch_write()` instead.
383 async fn batch_write_with_key_hashes(
384 &self,
385 prehashed_keys: Vec<(u64, &[u8])>,
386 allow_null_bytes: bool,
387 ) -> Result<u64>;
388
389 /// Renames an existing entry by copying it under a new key and marking the old key as deleted.
390 ///
391 /// This function:
392 /// - Reads the existing entry associated with `old_key`.
393 /// - Writes the same data under `new_key`.
394 /// - Deletes the `old_key` by appending a tombstone entry.
395 ///
396 /// # Parameters:
397 /// - `old_key`: The **original key** of the entry to be renamed.
398 /// - `new_key`: The **new key** under which the entry will be stored.
399 ///
400 /// # Returns:
401 /// - `Ok(new_offset)`: The file offset where the new entry was written.
402 /// - `Err(std::io::Error)`: If the old key is not found or if a write operation fails.
403 ///
404 /// # Notes:
405 /// - This operation **does not modify** the original entry but instead appends a new copy.
406 /// - The old key is **logically deleted** via an append-only tombstone.
407 /// - Attempting to rename a key to itself will return an error.
408 async fn rename(&self, old_key: &[u8], new_key: &[u8]) -> Result<u64>;
409
410 /// Copies an entry to a **different storage container**.
411 ///
412 /// This function:
413 /// - Reads the entry associated with `key` in the current storage.
414 /// - Writes it to the `target` storage.
415 ///
416 /// # Parameters:
417 /// - `key`: The **key** of the entry to be copied.
418 /// - `target`: The **destination storage** where the entry should be copied.
419 ///
420 /// # Returns:
421 /// - `Ok(target_offset)`: The file offset where the copied entry was written in the target storage.
422 /// - `Err(std::io::Error)`: If the key is not found, if the write operation fails,
423 /// or if attempting to copy to the same storage.
424 ///
425 /// # Notes:
426 /// - Copying within the **same** storage is unnecessary; use `rename` instead.
427 /// - This operation does **not** delete the original entry.
428 async fn copy(&self, key: &[u8], target: &DataStore) -> Result<u64>;
429
430 /// Moves an entry from the current storage to a **different storage container**.
431 ///
432 /// This function:
433 /// - Copies the entry from the current storage to `target`.
434 /// - Marks the original entry as deleted.
435 ///
436 /// # Parameters:
437 /// - `key`: The **key** of the entry to be moved.
438 /// - `target`: The **destination storage** where the entry should be moved.
439 ///
440 /// # Returns:
441 /// - `Ok(target_offset)`: The file offset where the entry was written in the target storage.
442 /// - `Err(std::io::Error)`: If the key is not found, or if the copy/delete operation fails.
443 ///
444 /// # Notes:
445 /// - Moving an entry within the **same** storage is unnecessary; use `rename` instead.
446 /// - The original entry is **logically deleted** by appending a tombstone, maintaining
447 /// the append-only structure.
448 async fn transfer(&self, key: &[u8], target: &DataStore) -> Result<u64>;
449
450 /// Logically deletes an entry by its key.
451 ///
452 /// The storage engine is **append-only**, so entries are not removed directly.
453 /// Instead, this method appends a **tombstone marker** to logically delete the key.
454 ///
455 /// This operation first **verifies that the key exists** before appending a tombstone.
456 /// If the key is not found, no data is written to the file, and the operation
457 /// succeeds without changing the store's state.
458 ///
459 /// # Parameters
460 /// - `key`: The **binary key** to mark as deleted.
461 ///
462 /// # Returns
463 /// - `Ok(tail_offset)`: The file's tail offset after the operation completes.
464 /// - `Err(std::io::Error)`: On I/O failure.
465 async fn delete(&self, key: &[u8]) -> Result<u64>;
466
467 /// Deletes a batch of entries from the storage by their keys.
468 ///
469 /// This method computes the hash for each key and then calls the underlying
470 /// `batch_delete_key_hashes` method. It will only write deletion markers
471 /// (tombstones) for keys that currently exist in the store.
472 ///
473 /// # Parameters
474 /// - `keys`: A slice of keys to be deleted.
475 ///
476 /// # Returns
477 /// - `Ok(tail_offset)`: The new tail offset of the file after the operation.
478 /// - `Err(std::io::Error)`: On I/O failure.
479 async fn batch_delete(&self, keys: &[&[u8]]) -> Result<u64>;
480
481 /// Deletes a batch of entries from the storage using pre-computed key hashes.
482 ///
483 /// This is the lowest-level batch deletion method. It checks for the existence
484 /// of each key hash in the in-memory index before writing a deletion marker.
485 /// This prevents the store from being filled with unnecessary tombstones for
486 /// keys that were never present.
487 ///
488 /// # Parameters
489 /// - `prehashed_keys`: A slice of `u64` key hashes to be deleted.
490 ///
491 /// # Returns
492 /// - `Ok(tail_offset)`: The new tail offset of the file after the operation.
493 /// - `Err(std::io::Error)`: On I/O failure.
494 async fn batch_delete_key_hashes(&self, prehashed_keys: &[u64]) -> Result<u64>;
495}