1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
use crate::storage_engine::DataStore;
use std::io::{Read, Result};
pub trait DataStoreWriter {
/// Writes an entry using a streaming `Read` source (e.g., file, network).
///
/// This method is designed for **large entries** that may exceed available RAM,
/// allowing them to be written in chunks without loading the full payload into memory.
///
/// # Parameters:
/// - `key`: The **binary key** for the entry.
/// - `reader`: A **streaming reader** (`Read` trait) supplying the entry's content.
///
/// # Returns:
/// - `Ok(offset)`: The file offset where the entry was written.
/// - `Err(std::io::Error)`: If a write or I/O operation fails.
///
/// # Notes:
/// - Internally, this method delegates to `write_stream_with_key_hash`, computing
/// the key hash first.
/// - If the entry is **small enough to fit in memory**, consider using `write()`
/// or `batch_write()` instead if you don't want to stream the data in.
///
/// # Streaming Behavior:
/// - The `reader` is **read incrementally in 64KB chunks** (`WRITE_STREAM_BUFFER_SIZE`).
/// - Data is immediately **written to disk** as it is read.
/// - **A checksum is computed incrementally** during the write.
/// - Metadata is appended **after** the full entry is written.
fn write_stream<R: Read>(&self, key: &[u8], reader: &mut R) -> Result<u64>;
/// Writes an entry with a given key and payload.
///
/// This method computes the hash of the key and delegates to `write_with_key_hash()`.
/// It is a **high-level API** for adding new entries to the storage.
///
/// # Parameters:
/// - `key`: The **binary key** associated with the entry.
/// - `payload`: The **data payload** to be stored.
///
/// # Returns:
/// - `Ok(offset)`: The file offset where the entry was written.
/// - `Err(std::io::Error)`: If a write operation fails.
///
/// # Notes:
/// - If you need streaming support, use `write_stream` instead.
/// - If multiple entries with the **same key** are written, the most recent
/// entry will be retrieved when reading.
/// - This method **locks the file for writing** to ensure consistency.
/// - For writing **multiple entries at once**, use `batch_write()`.
fn write(&self, key: &[u8], payload: &[u8]) -> Result<u64>;
/// Writes multiple key-value pairs as a **single transaction**.
///
/// This method computes the hashes of the provided keys and delegates to
/// `batch_write_hashed_payloads()`, ensuring all writes occur in a single
/// locked operation for efficiency.
///
/// # Parameters:
/// - `entries`: A **slice of key-value pairs**, where:
/// - `key`: The **binary key** for the entry.
/// - `payload`: The **data payload** to be stored.
///
/// # Returns:
/// - `Ok(final_offset)`: The file offset after all writes.
/// - `Err(std::io::Error)`: If a write operation fails.
///
/// # Notes:
/// - This method improves efficiency by **minimizing file lock contention**.
/// - If a large number of entries are written, **batching reduces overhead**.
/// - If the key hashes are already computed, use `batch_write_hashed_payloads()`.
fn batch_write(&self, entries: &[(&[u8], &[u8])]) -> Result<u64>;
/// Renames an existing entry by copying it under a new key and marking the old key as deleted.
///
/// This function:
/// - Reads the existing entry associated with `old_key`.
/// - Writes the same data under `new_key`.
/// - Deletes the `old_key` by appending a tombstone entry.
///
/// # Parameters:
/// - `old_key`: The **original key** of the entry to be renamed.
/// - `new_key`: The **new key** under which the entry will be stored.
///
/// # Returns:
/// - `Ok(new_offset)`: The file offset where the new entry was written.
/// - `Err(std::io::Error)`: If the old key is not found or if a write operation fails.
///
/// # Notes:
/// - This operation **does not modify** the original entry but instead appends a new copy.
/// - The old key is **logically deleted** via an append-only tombstone.
/// - Attempting to rename a key to itself will return an error.
fn rename(&self, old_key: &[u8], new_key: &[u8]) -> Result<u64>;
/// Copies an entry to a **different storage container**.
///
/// This function:
/// - Reads the entry associated with `key` in the current storage.
/// - Writes it to the `target` storage.
///
/// # Parameters:
/// - `key`: The **key** of the entry to be copied.
/// - `target`: The **destination storage** where the entry should be copied.
///
/// # Returns:
/// - `Ok(target_offset)`: The file offset where the copied entry was written in the target storage.
/// - `Err(std::io::Error)`: If the key is not found, if the write operation fails,
/// or if attempting to copy to the same storage.
///
/// # Notes:
/// - Copying within the **same** storage is unnecessary; use `rename` instead.
/// - This operation does **not** delete the original entry.
fn copy(&self, key: &[u8], target: &DataStore) -> Result<u64>;
/// Moves an entry from the current storage to a **different storage container**.
///
/// This function:
/// - Copies the entry from the current storage to `target`.
/// - Marks the original entry as deleted.
///
/// # Parameters:
/// - `key`: The **key** of the entry to be moved.
/// - `target`: The **destination storage** where the entry should be moved.
///
/// # Returns:
/// - `Ok(target_offset)`: The file offset where the entry was written in the target storage.
/// - `Err(std::io::Error)`: If the key is not found, or if the copy/delete operation fails.
///
/// # Notes:
/// - Moving an entry within the **same** storage is unnecessary; use `rename` instead.
/// - The original entry is **logically deleted** by appending a tombstone, maintaining
/// the append-only structure.
fn transfer(&self, key: &[u8], target: &DataStore) -> Result<u64>;
/// Deletes a key by appending a **null byte marker**.
///
/// The storage engine is **append-only**, so keys cannot be removed directly.
/// Instead, a **null byte is appended** as a tombstone entry to mark the key as deleted.
///
/// # Parameters:
/// - `key`: The **binary key** to mark as deleted.
///
/// # Returns:
/// - The **new file offset** where the delete marker was appended.
fn delete(&self, key: &[u8]) -> Result<u64>;
}
#[async_trait::async_trait]
pub trait AsyncDataStoreWriter {
/// Writes an entry using a streaming `Read` source (e.g., file, network).
///
/// This method is designed for **large entries** that may exceed available RAM,
/// allowing them to be written in chunks without loading the full payload into memory.
///
/// # Parameters:
/// - `key`: The **binary key** for the entry.
/// - `reader`: A **streaming reader** (`Read` trait) supplying the entry's content.
///
/// # Returns:
/// - `Ok(offset)`: The file offset where the entry was written.
/// - `Err(std::io::Error)`: If a write or I/O operation fails.
///
/// # Notes:
/// - Internally, this method delegates to `write_stream_with_key_hash`, computing
/// the key hash first.
/// - If the entry is **small enough to fit in memory**, consider using `write()`
/// or `batch_write()` instead if you don't want to stream the data in.
///
/// # Streaming Behavior:
/// - The `reader` is **read incrementally in 64KB chunks** (`WRITE_STREAM_BUFFER_SIZE`).
/// - Data is immediately **written to disk** as it is read.
/// - **A checksum is computed incrementally** during the write.
/// - Metadata is appended **after** the full entry is written.
async fn write_stream<R: Read>(&self, key: &[u8], reader: &mut R) -> Result<u64>;
/// Writes an entry with a given key and payload.
///
/// This method computes the hash of the key and delegates to `write_with_key_hash()`.
/// It is a **high-level API** for adding new entries to the storage.
///
/// # Parameters:
/// - `key`: The **binary key** associated with the entry.
/// - `payload`: The **data payload** to be stored.
///
/// # Returns:
/// - `Ok(offset)`: The file offset where the entry was written.
/// - `Err(std::io::Error)`: If a write operation fails.
///
/// # Notes:
/// - If you need streaming support, use `write_stream` instead.
/// - If multiple entries with the **same key** are written, the most recent
/// entry will be retrieved when reading.
/// - This method **locks the file for writing** to ensure consistency.
/// - For writing **multiple entries at once**, use `batch_write()`.
async fn write(&self, key: &[u8], payload: &[u8]) -> Result<u64>;
/// Writes multiple key-value pairs as a **single transaction**.
///
/// This method computes the hashes of the provided keys and delegates to
/// `batch_write_hashed_payloads()`, ensuring all writes occur in a single
/// locked operation for efficiency.
///
/// # Parameters:
/// - `entries`: A **slice of key-value pairs**, where:
/// - `key`: The **binary key** for the entry.
/// - `payload`: The **data payload** to be stored.
///
/// # Returns:
/// - `Ok(final_offset)`: The file offset after all writes.
/// - `Err(std::io::Error)`: If a write operation fails.
///
/// # Notes:
/// - This method improves efficiency by **minimizing file lock contention**.
/// - If a large number of entries are written, **batching reduces overhead**.
/// - If the key hashes are already computed, use `batch_write_hashed_payloads()`.
async fn batch_write(&self, entries: &[(&[u8], &[u8])]) -> Result<u64>;
/// Renames an existing entry by copying it under a new key and marking the old key as deleted.
///
/// This function:
/// - Reads the existing entry associated with `old_key`.
/// - Writes the same data under `new_key`.
/// - Deletes the `old_key` by appending a tombstone entry.
///
/// # Parameters:
/// - `old_key`: The **original key** of the entry to be renamed.
/// - `new_key`: The **new key** under which the entry will be stored.
///
/// # Returns:
/// - `Ok(new_offset)`: The file offset where the new entry was written.
/// - `Err(std::io::Error)`: If the old key is not found or if a write operation fails.
///
/// # Notes:
/// - This operation **does not modify** the original entry but instead appends a new copy.
/// - The old key is **logically deleted** via an append-only tombstone.
/// - Attempting to rename a key to itself will return an error.
async fn rename(&self, old_key: &[u8], new_key: &[u8]) -> Result<u64>;
/// Copies an entry to a **different storage container**.
///
/// This function:
/// - Reads the entry associated with `key` in the current storage.
/// - Writes it to the `target` storage.
///
/// # Parameters:
/// - `key`: The **key** of the entry to be copied.
/// - `target`: The **destination storage** where the entry should be copied.
///
/// # Returns:
/// - `Ok(target_offset)`: The file offset where the copied entry was written in the target storage.
/// - `Err(std::io::Error)`: If the key is not found, if the write operation fails,
/// or if attempting to copy to the same storage.
///
/// # Notes:
/// - Copying within the **same** storage is unnecessary; use `rename` instead.
/// - This operation does **not** delete the original entry.
async fn copy(&self, key: &[u8], target: &DataStore) -> Result<u64>;
/// Moves an entry from the current storage to a **different storage container**.
///
/// This function:
/// - Copies the entry from the current storage to `target`.
/// - Marks the original entry as deleted.
///
/// # Parameters:
/// - `key`: The **key** of the entry to be moved.
/// - `target`: The **destination storage** where the entry should be moved.
///
/// # Returns:
/// - `Ok(target_offset)`: The file offset where the entry was written in the target storage.
/// - `Err(std::io::Error)`: If the key is not found, or if the copy/delete operation fails.
///
/// # Notes:
/// - Moving an entry within the **same** storage is unnecessary; use `rename` instead.
/// - The original entry is **logically deleted** by appending a tombstone, maintaining
/// the append-only structure.
async fn transfer(&self, key: &[u8], target: &DataStore) -> Result<u64>;
/// Deletes a key by appending a **null byte marker**.
///
/// The storage engine is **append-only**, so keys cannot be removed directly.
/// Instead, a **null byte is appended** as a tombstone entry to mark the key as deleted.
///
/// # Parameters:
/// - `key`: The **binary key** to mark as deleted.
///
/// # Returns:
/// - The **new file offset** where the delete marker was appended.
async fn delete(&self, key: &[u8]) -> Result<u64>;
}