Skip to main content

simd_r_drive/storage_engine/traits/
writer.rs

1use crate::storage_engine::DataStore;
2use std::io::{Read, Result};
3
4pub trait DataStoreWriter {
5    /// Writes an entry using a streaming `Read` source (e.g., file, network).
6    ///
7    /// This method is designed for **large entries** that may exceed available RAM,
8    /// allowing them to be written in chunks without loading the full payload into memory.
9    ///
10    /// # Parameters:
11    /// - `key`: The **binary key** for the entry.
12    /// - `reader`: A **streaming reader** (`Read` trait) supplying the entry's content.
13    ///
14    /// # Returns:
15    /// - `Ok(offset)`: The file offset where the entry was written.
16    /// - `Err(std::io::Error)`: If a write or I/O operation fails.
17    ///
18    /// # Notes:
19    /// - Internally, this method delegates to `write_stream_with_key_hash`, computing
20    ///   the key hash first.
21    /// - If the entry is **small enough to fit in memory**, consider using `write()`
22    ///   or `batch_write()` instead if you don't want to stream the data in.
23    ///
24    /// # Streaming Behavior:
25    /// - The `reader` is **read incrementally in 64KB chunks** (`WRITE_STREAM_BUFFER_SIZE`).
26    /// - Data is immediately **written to disk** as it is read.
27    /// - **A checksum is computed incrementally** during the write.
28    /// - Metadata is appended **after** the full entry is written.
29    fn write_stream<R: Read>(&self, key: &[u8], reader: &mut R) -> Result<u64>;
30
31    /// Writes an entry using a **precomputed key hash** and a streaming `Read` source.
32    ///
33    /// This is a **low-level** method that operates like `write_stream`, but requires
34    /// the key to be hashed beforehand. It is primarily used internally to avoid
35    /// redundant hash computations when writing multiple entries.
36    ///
37    /// # Parameters:
38    /// - `key_hash`: The **precomputed hash** of the key.
39    /// - `reader`: A **streaming reader** (`Read` trait) supplying the entry's content.
40    ///
41    /// # Returns:
42    /// - `Ok(offset)`: The file offset where the entry was written.
43    /// - `Err(std::io::Error)`: If a write or I/O operation fails.
44    fn write_stream_with_key_hash<R: Read>(&self, key_hash: u64, reader: &mut R) -> Result<u64>;
45
46    /// Writes an entry with a given key and payload.
47    ///
48    /// This method computes the hash of the key and delegates to `write_with_key_hash()`.
49    /// It is a **high-level API** for adding new entries to the storage.
50    ///
51    /// # Parameters:
52    /// - `key`: The **binary key** associated with the entry.
53    /// - `payload`: The **data payload** to be stored.
54    ///
55    /// # Returns:
56    /// - `Ok(offset)`: The file offset where the entry was written.
57    /// - `Err(std::io::Error)`: If a write operation fails.
58    ///
59    /// # Notes:
60    /// - If you need streaming support, use `write_stream` instead.
61    /// - If multiple entries with the **same key** are written, the most recent
62    ///   entry will be retrieved when reading.
63    /// - This method **locks the file for writing** to ensure consistency.
64    /// - For writing **multiple entries at once**, use `batch_write()`.
65    fn write(&self, key: &[u8], payload: &[u8]) -> Result<u64>;
66
67    /// Writes an entry using a **precomputed key hash** and a payload.
68    ///
69    /// This method is a **low-level** alternative to `write()`, allowing direct
70    /// specification of the key hash. It is mainly used for optimized workflows
71    /// where the key hash is already known, avoiding redundant computations.
72    ///
73    /// # Parameters:
74    /// - `key_hash`: The **precomputed hash** of the key.
75    /// - `payload`: The **data payload** to be stored.
76    ///
77    /// # Returns:
78    /// - `Ok(offset)`: The file offset where the entry was written.
79    /// - `Err(std::io::Error)`: If a write operation fails.
80    ///
81    /// # Notes:
82    /// - The caller is responsible for ensuring that `key_hash` is correctly computed.
83    /// - This method **locks the file for writing** to maintain consistency.
84    /// - If writing **multiple entries**, consider using `batch_write_with_key_hashes()`.
85    fn write_with_key_hash(&self, key_hash: u64, payload: &[u8]) -> Result<u64>;
86
87    /// Writes multiple key-value pairs as a **single transaction**.
88    ///
89    /// This method computes the hashes of the provided keys and delegates to
90    /// `batch_write_with_key_hashes()`, ensuring all writes occur in a single
91    /// locked operation for efficiency.
92    ///
93    /// # Parameters:
94    /// - `entries`: A **slice of key-value pairs**, where:
95    ///   - `key`: The **binary key** for the entry.
96    ///   - `payload`: The **data payload** to be stored.
97    ///
98    /// # Returns:
99    /// - `Ok(final_offset)`: The file offset after all writes.
100    /// - `Err(std::io::Error)`: If a write operation fails.
101    ///
102    /// # Notes:
103    /// - This method improves efficiency by **minimizing file lock contention**.
104    /// - If a large number of entries are written, **batching reduces overhead**.
105    /// - If the key hashes are already computed, use `batch_write_with_key_hashes()`.
106    fn batch_write(&self, entries: &[(&[u8], &[u8])]) -> Result<u64>;
107
108    /// Writes multiple key-value pairs as a **single transaction**, using precomputed key hashes.
109    ///
110    /// This method efficiently appends multiple entries in a **batch operation**,
111    /// reducing lock contention and improving performance for bulk writes.
112    ///
113    /// # Parameters:
114    /// - `prehashed_keys`: A **vector of precomputed key hashes and payloads**, where:
115    ///   - `key_hash`: The **precomputed hash** of the key.
116    ///   - `payload`: The **data payload** to be stored.
117    ///
118    /// # Returns:
119    /// - `Ok(final_offset)`: The file offset after all writes.
120    /// - `Err(std::io::Error)`: If a write operation fails.
121    ///
122    /// # Notes:
123    /// - **File locking is performed only once** for all writes, improving efficiency.
124    /// - If an entry's `payload` is empty, an error is returned.
125    /// - This method uses **SIMD-accelerated memory copy (`simd_copy`)** to optimize write
126    ///   performance.
127    /// - **Metadata (checksums, offsets) is written after payloads** to ensure data integrity.
128    /// - After writing, the memory-mapped file (`mmap`) is **remapped** to reflect updates.
129    ///
130    /// # Efficiency Considerations:
131    /// - **Faster than multiple `write()` calls**, since it reduces lock contention.
132    /// - Suitable for **bulk insertions** where key hashes are known beforehand.
133    /// - If keys are available but not hashed, use `batch_write()` instead.
134    fn batch_write_with_key_hashes(
135        &self,
136        prehashed_keys: Vec<(u64, &[u8])>,
137        allow_null_bytes: bool,
138    ) -> Result<u64>;
139
140    /// Renames an existing entry by copying it under a new key and marking the old key as deleted.
141    ///
142    /// This function:
143    /// - Reads the existing entry associated with `old_key`.
144    /// - Writes the same data under `new_key`.
145    /// - Deletes the `old_key` by appending a tombstone entry.
146    ///
147    /// # Parameters:
148    /// - `old_key`: The **original key** of the entry to be renamed.
149    /// - `new_key`: The **new key** under which the entry will be stored.
150    ///
151    /// # Returns:
152    /// - `Ok(new_offset)`: The file offset where the new entry was written.
153    /// - `Err(std::io::Error)`: If the old key is not found or if a write operation fails.
154    ///
155    /// # Notes:
156    /// - This operation **does not modify** the original entry but instead appends a new copy.
157    /// - The old key is **logically deleted** via an append-only tombstone.
158    /// - Attempting to rename a key to itself will return an error.
159    fn rename(&self, old_key: &[u8], new_key: &[u8]) -> Result<u64>;
160
161    /// Copies an entry to a **different storage container**.
162    ///
163    /// This function:
164    /// - Reads the entry associated with `key` in the current storage.
165    /// - Writes it to the `target` storage.
166    ///
167    /// # Parameters:
168    /// - `key`: The **key** of the entry to be copied.
169    /// - `target`: The **destination storage** where the entry should be copied.
170    ///
171    /// # Returns:
172    /// - `Ok(target_offset)`: The file offset where the copied entry was written in the target storage.
173    /// - `Err(std::io::Error)`: If the key is not found, if the write operation fails,  
174    ///   or if attempting to copy to the same storage.
175    ///
176    /// # Notes:
177    /// - Copying within the **same** storage is unnecessary; use `rename` instead.
178    /// - This operation does **not** delete the original entry.
179    fn copy(&self, key: &[u8], target: &DataStore) -> Result<u64>;
180
181    /// Moves an entry from the current storage to a **different storage container**.
182    ///
183    /// This function:
184    /// - Copies the entry from the current storage to `target`.
185    /// - Marks the original entry as deleted.
186    ///
187    /// # Parameters:
188    /// - `key`: The **key** of the entry to be moved.
189    /// - `target`: The **destination storage** where the entry should be moved.
190    ///
191    /// # Returns:
192    /// - `Ok(target_offset)`: The file offset where the entry was written in the target storage.
193    /// - `Err(std::io::Error)`: If the key is not found, or if the copy/delete operation fails.
194    ///
195    /// # Notes:
196    /// - Moving an entry within the **same** storage is unnecessary; use `rename` instead.
197    /// - The original entry is **logically deleted** by appending a tombstone, maintaining
198    ///   the append-only structure.
199    fn transfer(&self, key: &[u8], target: &DataStore) -> Result<u64>;
200
201    /// Logically deletes an entry by its key.
202    ///
203    /// The storage engine is **append-only**, so entries are not removed directly.
204    /// Instead, this method appends a **tombstone marker** to logically delete the key.
205    ///
206    /// This operation first **verifies that the key exists** before appending a tombstone.
207    /// If the key is not found, no data is written to the file, and the operation
208    /// succeeds without changing the store's state.
209    ///
210    /// # Parameters
211    /// - `key`: The **binary key** to mark as deleted.
212    ///
213    /// # Returns
214    /// - `Ok(tail_offset)`: The file's tail offset after the operation completes.
215    /// - `Err(std::io::Error)`: On I/O failure.
216    fn delete(&self, key: &[u8]) -> Result<u64>;
217
218    /// Deletes a batch of entries from the storage by their keys.
219    ///
220    /// This method computes the hash for each key and then calls the underlying
221    /// `batch_delete_key_hashes` method. It will only write deletion markers
222    /// (tombstones) for keys that currently exist in the store.
223    ///
224    /// # Parameters
225    /// - `keys`: A slice of keys to be deleted.
226    ///
227    /// # Returns
228    /// - `Ok(tail_offset)`: The new tail offset of the file after the operation.
229    /// - `Err(std::io::Error)`: On I/O failure.
230    fn batch_delete(&self, keys: &[&[u8]]) -> Result<u64>;
231
232    /// Deletes a batch of entries from the storage using pre-computed key hashes.
233    ///
234    /// This is the lowest-level batch deletion method. It checks for the existence
235    /// of each key hash in the in-memory index before writing a deletion marker.
236    /// This prevents the store from being filled with unnecessary tombstones for
237    /// keys that were never present.
238    ///
239    /// # Parameters
240    /// - `prehashed_keys`: A slice of `u64` key hashes to be deleted.
241    ///
242    /// # Returns
243    /// - `Ok(tail_offset)`: The new tail offset of the file after the operation.
244    /// - `Err(std::io::Error)`: On I/O failure.
245    fn batch_delete_key_hashes(&self, prehashed_keys: &[u64]) -> Result<u64>;
246}
247
248#[async_trait::async_trait]
249pub trait AsyncDataStoreWriter {
250    /// Writes an entry using a streaming `Read` source (e.g., file, network).
251    ///
252    /// This method is designed for **large entries** that may exceed available RAM,
253    /// allowing them to be written in chunks without loading the full payload into memory.
254    ///
255    /// # Parameters:
256    /// - `key`: The **binary key** for the entry.
257    /// - `reader`: A **streaming reader** (`Read` trait) supplying the entry's content.
258    ///
259    /// # Returns:
260    /// - `Ok(offset)`: The file offset where the entry was written.
261    /// - `Err(std::io::Error)`: If a write or I/O operation fails.
262    ///
263    /// # Notes:
264    /// - Internally, this method delegates to `write_stream_with_key_hash`, computing
265    ///   the key hash first.
266    /// - If the entry is **small enough to fit in memory**, consider using `write()`
267    ///   or `batch_write()` instead if you don't want to stream the data in.
268    ///
269    /// # Streaming Behavior:
270    /// - The `reader` is **read incrementally in 64KB chunks** (`WRITE_STREAM_BUFFER_SIZE`).
271    /// - Data is immediately **written to disk** as it is read.
272    /// - **A checksum is computed incrementally** during the write.
273    /// - Metadata is appended **after** the full entry is written.
274    async fn write_stream<R: Read>(&self, key: &[u8], reader: &mut R) -> Result<u64>;
275
276    /// Writes an entry using a **precomputed key hash** and a streaming `Read` source.
277    ///
278    /// This is a **low-level** method that operates like `write_stream`, but requires
279    /// the key to be hashed beforehand. It is primarily used internally to avoid
280    /// redundant hash computations when writing multiple entries.
281    ///
282    /// # Parameters:
283    /// - `key_hash`: The **precomputed hash** of the key.
284    /// - `reader`: A **streaming reader** (`Read` trait) supplying the entry's content.
285    ///
286    /// # Returns:
287    /// - `Ok(offset)`: The file offset where the entry was written.
288    /// - `Err(std::io::Error)`: If a write or I/O operation fails.
289    async fn write_stream_with_key_hash<R: Read>(
290        &self,
291        key_hash: u64,
292        reader: &mut R,
293    ) -> Result<u64>;
294
295    /// Writes an entry with a given key and payload.
296    ///
297    /// This method computes the hash of the key and delegates to `write_with_key_hash()`.
298    /// It is a **high-level API** for adding new entries to the storage.
299    ///
300    /// # Parameters:
301    /// - `key`: The **binary key** associated with the entry.
302    /// - `payload`: The **data payload** to be stored.
303    ///
304    /// # Returns:
305    /// - `Ok(offset)`: The file offset where the entry was written.
306    /// - `Err(std::io::Error)`: If a write operation fails.
307    ///
308    /// # Notes:
309    /// - If you need streaming support, use `write_stream` instead.
310    /// - If multiple entries with the **same key** are written, the most recent
311    ///   entry will be retrieved when reading.
312    /// - This method **locks the file for writing** to ensure consistency.
313    /// - For writing **multiple entries at once**, use `batch_write()`.
314    async fn write(&self, key: &[u8], payload: &[u8]) -> Result<u64>;
315
316    /// Writes an entry using a **precomputed key hash** and a payload.
317    ///
318    /// This method is a **low-level** alternative to `write()`, allowing direct
319    /// specification of the key hash. It is mainly used for optimized workflows
320    /// where the key hash is already known, avoiding redundant computations.
321    ///
322    /// # Parameters:
323    /// - `key_hash`: The **precomputed hash** of the key.
324    /// - `payload`: The **data payload** to be stored.
325    ///
326    /// # Returns:
327    /// - `Ok(offset)`: The file offset where the entry was written.
328    /// - `Err(std::io::Error)`: If a write operation fails.
329    ///
330    /// # Notes:
331    /// - The caller is responsible for ensuring that `key_hash` is correctly computed.
332    /// - This method **locks the file for writing** to maintain consistency.
333    /// - If writing **multiple entries**, consider using `batch_write_with_key_hashes()`.
334    async fn write_with_key_hash(&self, key_hash: u64, payload: &[u8]) -> Result<u64>;
335
336    /// Writes multiple key-value pairs as a **single transaction**.
337    ///
338    /// This method computes the hashes of the provided keys and delegates to
339    /// `batch_write_with_key_hashes()`, ensuring all writes occur in a single
340    /// locked operation for efficiency.
341    ///
342    /// # Parameters:
343    /// - `entries`: A **slice of key-value pairs**, where:
344    ///   - `key`: The **binary key** for the entry.
345    ///   - `payload`: The **data payload** to be stored.
346    ///
347    /// # Returns:
348    /// - `Ok(final_offset)`: The file offset after all writes.
349    /// - `Err(std::io::Error)`: If a write operation fails.
350    ///
351    /// # Notes:
352    /// - This method improves efficiency by **minimizing file lock contention**.
353    /// - If a large number of entries are written, **batching reduces overhead**.
354    /// - If the key hashes are already computed, use `batch_write_with_key_hashes()`.
355    async fn batch_write(&self, entries: &[(&[u8], &[u8])]) -> Result<u64>;
356
357    /// Writes multiple key-value pairs as a **single transaction**, using precomputed key hashes.
358    ///
359    /// This method efficiently appends multiple entries in a **batch operation**,
360    /// reducing lock contention and improving performance for bulk writes.
361    ///
362    /// # Parameters:
363    /// - `prehashed_keys`: A **vector of precomputed key hashes and payloads**, where:
364    ///   - `key_hash`: The **precomputed hash** of the key.
365    ///   - `payload`: The **data payload** to be stored.
366    ///
367    /// # Returns:
368    /// - `Ok(final_offset)`: The file offset after all writes.
369    /// - `Err(std::io::Error)`: If a write operation fails.
370    ///
371    /// # Notes:
372    /// - **File locking is performed only once** for all writes, improving efficiency.
373    /// - If an entry's `payload` is empty, an error is returned.
374    /// - This method uses **SIMD-accelerated memory copy (`simd_copy`)** to optimize write
375    ///   performance.
376    /// - **Metadata (checksums, offsets) is written after payloads** to ensure data integrity.
377    /// - After writing, the memory-mapped file (`mmap`) is **remapped** to reflect updates.
378    ///
379    /// # Efficiency Considerations:
380    /// - **Faster than multiple `write()` calls**, since it reduces lock contention.
381    /// - Suitable for **bulk insertions** where key hashes are known beforehand.
382    /// - If keys are available but not hashed, use `batch_write()` instead.
383    async fn batch_write_with_key_hashes(
384        &self,
385        prehashed_keys: Vec<(u64, &[u8])>,
386        allow_null_bytes: bool,
387    ) -> Result<u64>;
388
389    /// Renames an existing entry by copying it under a new key and marking the old key as deleted.
390    ///
391    /// This function:
392    /// - Reads the existing entry associated with `old_key`.
393    /// - Writes the same data under `new_key`.
394    /// - Deletes the `old_key` by appending a tombstone entry.
395    ///
396    /// # Parameters:
397    /// - `old_key`: The **original key** of the entry to be renamed.
398    /// - `new_key`: The **new key** under which the entry will be stored.
399    ///
400    /// # Returns:
401    /// - `Ok(new_offset)`: The file offset where the new entry was written.
402    /// - `Err(std::io::Error)`: If the old key is not found or if a write operation fails.
403    ///
404    /// # Notes:
405    /// - This operation **does not modify** the original entry but instead appends a new copy.
406    /// - The old key is **logically deleted** via an append-only tombstone.
407    /// - Attempting to rename a key to itself will return an error.
408    async fn rename(&self, old_key: &[u8], new_key: &[u8]) -> Result<u64>;
409
410    /// Copies an entry to a **different storage container**.
411    ///
412    /// This function:
413    /// - Reads the entry associated with `key` in the current storage.
414    /// - Writes it to the `target` storage.
415    ///
416    /// # Parameters:
417    /// - `key`: The **key** of the entry to be copied.
418    /// - `target`: The **destination storage** where the entry should be copied.
419    ///
420    /// # Returns:
421    /// - `Ok(target_offset)`: The file offset where the copied entry was written in the target storage.
422    /// - `Err(std::io::Error)`: If the key is not found, if the write operation fails,  
423    ///   or if attempting to copy to the same storage.
424    ///
425    /// # Notes:
426    /// - Copying within the **same** storage is unnecessary; use `rename` instead.
427    /// - This operation does **not** delete the original entry.
428    async fn copy(&self, key: &[u8], target: &DataStore) -> Result<u64>;
429
430    /// Moves an entry from the current storage to a **different storage container**.
431    ///
432    /// This function:
433    /// - Copies the entry from the current storage to `target`.
434    /// - Marks the original entry as deleted.
435    ///
436    /// # Parameters:
437    /// - `key`: The **key** of the entry to be moved.
438    /// - `target`: The **destination storage** where the entry should be moved.
439    ///
440    /// # Returns:
441    /// - `Ok(target_offset)`: The file offset where the entry was written in the target storage.
442    /// - `Err(std::io::Error)`: If the key is not found, or if the copy/delete operation fails.
443    ///
444    /// # Notes:
445    /// - Moving an entry within the **same** storage is unnecessary; use `rename` instead.
446    /// - The original entry is **logically deleted** by appending a tombstone, maintaining
447    ///   the append-only structure.
448    async fn transfer(&self, key: &[u8], target: &DataStore) -> Result<u64>;
449
450    /// Logically deletes an entry by its key.
451    ///
452    /// The storage engine is **append-only**, so entries are not removed directly.
453    /// Instead, this method appends a **tombstone marker** to logically delete the key.
454    ///
455    /// This operation first **verifies that the key exists** before appending a tombstone.
456    /// If the key is not found, no data is written to the file, and the operation
457    /// succeeds without changing the store's state.
458    ///
459    /// # Parameters
460    /// - `key`: The **binary key** to mark as deleted.
461    ///
462    /// # Returns
463    /// - `Ok(tail_offset)`: The file's tail offset after the operation completes.
464    /// - `Err(std::io::Error)`: On I/O failure.
465    async fn delete(&self, key: &[u8]) -> Result<u64>;
466
467    /// Deletes a batch of entries from the storage by their keys.
468    ///
469    /// This method computes the hash for each key and then calls the underlying
470    /// `batch_delete_key_hashes` method. It will only write deletion markers
471    /// (tombstones) for keys that currently exist in the store.
472    ///
473    /// # Parameters
474    /// - `keys`: A slice of keys to be deleted.
475    ///
476    /// # Returns
477    /// - `Ok(tail_offset)`: The new tail offset of the file after the operation.
478    /// - `Err(std::io::Error)`: On I/O failure.
479    async fn batch_delete(&self, keys: &[&[u8]]) -> Result<u64>;
480
481    /// Deletes a batch of entries from the storage using pre-computed key hashes.
482    ///
483    /// This is the lowest-level batch deletion method. It checks for the existence
484    /// of each key hash in the in-memory index before writing a deletion marker.
485    /// This prevents the store from being filled with unnecessary tombstones for
486    /// keys that were never present.
487    ///
488    /// # Parameters
489    /// - `prehashed_keys`: A slice of `u64` key hashes to be deleted.
490    ///
491    /// # Returns
492    /// - `Ok(tail_offset)`: The new tail offset of the file after the operation.
493    /// - `Err(std::io::Error)`: On I/O failure.
494    async fn batch_delete_key_hashes(&self, prehashed_keys: &[u64]) -> Result<u64>;
495}