commonware-storage 2026.4.0

Persist and retrieve data from an abstract store.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
//! A shared, generic implementation of the _Any_ QMDB.
//!
//! The impl blocks in this file define shared functionality across all Any QMDB variants.

use super::operation::{update::Update, Operation};
use crate::{
    index::Unordered as UnorderedIndex,
    journal::{
        authenticated,
        contiguous::{Contiguous, Mutable, Reader},
        Error as JournalError,
    },
    merkle::{Family, Location, Proof},
    qmdb::{
        build_snapshot_from_log, delete_known_loc, operation::Operation as OperationTrait,
        update_known_loc, Error,
    },
    Context, Persistable,
};
use commonware_codec::{Codec, CodecShared};
use commonware_cryptography::Hasher;
use core::num::NonZeroU64;
use std::collections::HashMap;

/// Type alias for the authenticated journal used by [Db].
pub(crate) type AuthenticatedLog<F, E, C, H> = authenticated::Journal<F, E, C, H>;

/// Snapshot mutation needed to undo one operation while rewinding.
enum SnapshotUndo<F: Family, K> {
    Replace {
        key: K,
        old_loc: Location<F>,
        new_loc: Location<F>,
    },
    Remove {
        key: K,
        old_loc: Location<F>,
    },
    Insert {
        key: K,
        new_loc: Location<F>,
    },
}

/// An "Any" QMDB implementation generic over ordered/unordered keys and variable/fixed values.
/// Consider using one of the following specialized variants instead, which may be more ergonomic:
/// - [crate::qmdb::any::ordered::fixed::Db]
/// - [crate::qmdb::any::ordered::variable::Db]
/// - [crate::qmdb::any::unordered::fixed::Db]
/// - [crate::qmdb::any::unordered::variable::Db]
pub struct Db<
    F: Family,
    E: Context,
    C: Contiguous<Item: CodecShared>,
    I: UnorderedIndex<Value = Location<F>>,
    H: Hasher,
    U: Send + Sync,
> {
    /// A (pruned) log of all operations in order of their application. The index of each
    /// operation in the log is called its _location_, which is a stable identifier.
    ///
    /// # Invariants
    ///
    /// - The log is never pruned beyond the inactivity floor.
    /// - There is always at least one commit operation in the log.
    pub(crate) log: AuthenticatedLog<F, E, C, H>,

    /// A location before which all operations are "inactive" (that is, operations before this point
    /// are over keys that have been updated by some operation at or after this point).
    pub(crate) inactivity_floor_loc: Location<F>,

    /// The location of the last commit operation.
    pub(crate) last_commit_loc: Location<F>,

    /// A snapshot of all currently active operations in the form of a map from each key to the
    /// location in the log containing its most recent update.
    ///
    /// # Invariant
    ///
    /// - Only references `Operation::Update`s.
    pub(crate) snapshot: I,

    /// The number of active keys in the snapshot.
    pub(crate) active_keys: usize,

    /// Marker for the update type parameter.
    pub(crate) _update: core::marker::PhantomData<U>,
}

// Shared read-only functionality.
impl<F, E, U, C, I, H> Db<F, E, C, I, H, U>
where
    F: Family,
    E: Context,
    U: Update,
    C: Contiguous<Item = Operation<F, U>>,
    I: UnorderedIndex<Value = Location<F>>,
    H: Hasher,
    Operation<F, U>: Codec,
{
    /// Return the inactivity floor location. This is the location before which all operations are
    /// known to be inactive. Operations before this point can be safely pruned.
    pub const fn inactivity_floor_loc(&self) -> Location<F> {
        self.inactivity_floor_loc
    }

    /// Whether the snapshot currently has no active keys.
    pub const fn is_empty(&self) -> bool {
        self.active_keys == 0
    }

    /// Get the metadata associated with the last commit.
    pub async fn get_metadata(&self) -> Result<Option<U::Value>, crate::qmdb::Error<F>> {
        match self.log.reader().await.read(*self.last_commit_loc).await? {
            Operation::CommitFloor(metadata, _) => Ok(metadata),
            _ => unreachable!("last commit is not a CommitFloor operation"),
        }
    }

    pub fn root(&self) -> H::Digest {
        self.log.root()
    }

    /// Get the value of `key` in the db, or None if it has no value.
    pub async fn get(&self, key: &U::Key) -> Result<Option<U::Value>, crate::qmdb::Error<F>> {
        // Collect to avoid holding a borrow across await points (rust-lang/rust#100013).
        let locs: Vec<Location<F>> = self.snapshot.get(key).copied().collect();
        let reader = self.log.reader().await;
        for loc in locs {
            let op = reader.read(*loc).await?;
            let Operation::Update(data) = op else {
                panic!("location does not reference update operation. loc={loc}");
            };
            if data.key() == key {
                return Ok(Some(data.value().clone()));
            }
        }
        Ok(None)
    }

    /// Return [start, end) where `start` and `end - 1` are the Locations of the oldest and newest
    /// retained operations respectively.
    pub async fn bounds(&self) -> std::ops::Range<Location<F>> {
        let bounds = self.log.reader().await.bounds();
        Location::new(bounds.start)..Location::new(bounds.end)
    }

    /// Return the pinned Merkle nodes for a lower operation boundary of `loc`.
    pub async fn pinned_nodes_at(
        &self,
        loc: Location<F>,
    ) -> Result<Vec<H::Digest>, crate::qmdb::Error<F>> {
        if !loc.is_valid() {
            return Err(crate::merkle::Error::LocationOverflow(loc).into());
        }
        let futs: Vec<_> = F::nodes_to_pin(loc)
            .map(|p| async move {
                self.log
                    .merkle
                    .get_node(p)
                    .await?
                    .ok_or(crate::merkle::Error::ElementPruned(p).into())
            })
            .collect();
        futures::future::try_join_all(futs).await
    }
}

// Functionality requiring Mutable journal.
impl<F, E, U, C, I, H> Db<F, E, C, I, H, U>
where
    F: Family,
    E: Context,
    U: Update,
    C: Mutable<Item = Operation<F, U>>,
    I: UnorderedIndex<Value = Location<F>>,
    H: Hasher,
    Operation<F, U>: Codec,
{
    /// Prunes historical operations prior to `prune_loc`. This does not affect the db's root or
    /// snapshot.
    ///
    /// # Errors
    ///
    /// - Returns [crate::qmdb::Error::PruneBeyondMinRequired] if `prune_loc` > inactivity floor.
    /// - Returns [`crate::merkle::Error::LocationOverflow`] if `prune_loc` > [`crate::merkle::Family::MAX_LEAVES`].
    pub async fn prune(&mut self, prune_loc: Location<F>) -> Result<(), crate::qmdb::Error<F>> {
        if prune_loc > self.inactivity_floor_loc {
            return Err(crate::qmdb::Error::PruneBeyondMinRequired(
                prune_loc,
                self.inactivity_floor_loc,
            ));
        }

        self.log.prune(prune_loc).await?;

        Ok(())
    }

    pub async fn historical_proof(
        &self,
        historical_size: Location<F>,
        start_loc: Location<F>,
        max_ops: NonZeroU64,
    ) -> Result<(Proof<F, H::Digest>, Vec<Operation<F, U>>), crate::qmdb::Error<F>> {
        self.log
            .historical_proof(historical_size, start_loc, max_ops)
            .await
            .map_err(Into::into)
    }

    pub async fn proof(
        &self,
        loc: Location<F>,
        max_ops: NonZeroU64,
    ) -> Result<(Proof<F, H::Digest>, Vec<Operation<F, U>>), crate::qmdb::Error<F>> {
        self.historical_proof(self.log.size().await, loc, max_ops)
            .await
    }

    /// Rewind the database to `size` operations, where `size` is the location of the next append.
    ///
    /// This rewinds both the authenticated log and the in-memory snapshot, then restores metadata
    /// (`last_commit_loc`, `inactivity_floor_loc`, `active_keys`) for the new tip commit.
    ///
    /// # Errors
    ///
    /// Returns an error when:
    /// - `size` is not a valid rewind target
    /// - the target's required logical range is not fully retained (for example, the target
    ///   inactivity floor is pruned)
    /// - `size - 1` is not a commit operation
    ///
    /// Any error from this method is fatal for this handle. Rewind may mutate journal state before
    /// all in-memory structures are rebuilt. Callers must drop this database handle after any `Err`
    /// from `rewind` and reopen from storage.
    ///
    /// Returns the list of locations restored to active state in the snapshot.
    ///
    /// A successful rewind is not restart-stable until a subsequent [`Db::commit`] or
    /// [`Db::sync`].
    pub async fn rewind(&mut self, size: Location<F>) -> Result<Vec<Location<F>>, Error<F>> {
        let rewind_size = *size;
        let current_size = *self.last_commit_loc + 1;

        if rewind_size == current_size {
            return Ok(Vec::new());
        }
        if rewind_size == 0 || rewind_size > current_size {
            return Err(Error::Journal(JournalError::InvalidRewind(rewind_size)));
        }

        // Read everything needed for rewind before mutating storage.
        let (rewind_floor, undos, active_keys_delta) = {
            let reader = self.log.reader().await;
            let bounds = reader.bounds();
            let rewind_last_loc = Location::new(rewind_size - 1);
            if rewind_size <= bounds.start {
                return Err(Error::<F>::Journal(JournalError::ItemPruned(
                    *rewind_last_loc,
                )));
            }
            let rewind_last_op = reader.read(*rewind_last_loc).await?;
            let Some(rewind_floor) = rewind_last_op.has_floor() else {
                return Err(Error::UnexpectedData(rewind_last_loc));
            };
            if *rewind_floor < bounds.start {
                return Err(Error::<F>::Journal(JournalError::ItemPruned(*rewind_floor)));
            }

            let mut undos = Vec::with_capacity((current_size - rewind_size) as usize);
            let mut active_keys_delta = 0isize;
            let mut prior_state_by_key: HashMap<U::Key, Option<Location<F>>> = HashMap::new();

            // Reconstruct key state once in a single pass from the rewind floor.
            for loc in *rewind_floor..current_size {
                let op = reader.read(loc).await?;
                let op_loc = Location::new(loc);
                match op {
                    Operation::CommitFloor(_, _) => {}
                    Operation::Update(update) => {
                        let key = update.key().clone();
                        let previous_loc = prior_state_by_key.get(&key).copied().flatten();

                        if loc >= rewind_size {
                            if let Some(previous_loc) = previous_loc {
                                undos.push(SnapshotUndo::Replace {
                                    key: key.clone(),
                                    old_loc: op_loc,
                                    new_loc: previous_loc,
                                });
                            } else {
                                active_keys_delta -= 1;
                                undos.push(SnapshotUndo::Remove {
                                    key: key.clone(),
                                    old_loc: op_loc,
                                });
                            }
                        }

                        prior_state_by_key.insert(key, Some(op_loc));
                    }
                    Operation::Delete(key) => {
                        let previous_loc = prior_state_by_key.get(&key).copied().flatten();

                        if loc >= rewind_size {
                            if let Some(previous_loc) = previous_loc {
                                active_keys_delta += 1;
                                undos.push(SnapshotUndo::Insert {
                                    key: key.clone(),
                                    new_loc: previous_loc,
                                });
                            }
                        }

                        prior_state_by_key.insert(key, None);
                    }
                }
            }

            // Undo operations must run from newest to oldest removed operation.
            undos.reverse();

            (rewind_floor, undos, active_keys_delta)
        };

        // Journal rewind happens before in-memory undo application. If any later step fails, this
        // handle may be internally diverged and must be dropped by the caller. This step is not
        // restart-stable until a later commit/sync boundary.
        self.log.rewind(rewind_size).await?;

        let mut restored_locs = Vec::new();
        for undo in undos {
            match undo {
                SnapshotUndo::Replace {
                    key,
                    old_loc,
                    new_loc,
                } => {
                    if new_loc < rewind_size {
                        restored_locs.push(new_loc);
                    }
                    update_known_loc(&mut self.snapshot, &key, old_loc, new_loc);
                }
                SnapshotUndo::Remove { key, old_loc } => {
                    delete_known_loc(&mut self.snapshot, &key, old_loc)
                }
                SnapshotUndo::Insert { key, new_loc } => {
                    if new_loc < rewind_size {
                        restored_locs.push(new_loc);
                    }
                    self.snapshot.insert(&key, new_loc);
                }
            }
        }

        self.active_keys = self
            .active_keys
            .checked_add_signed(active_keys_delta)
            .ok_or(Error::DataCorrupted(
                "active_keys underflow while rewinding",
            ))?;
        self.last_commit_loc = Location::new(rewind_size - 1);
        self.inactivity_floor_loc = rewind_floor;

        Ok(restored_locs)
    }
}

// Functionality requiring Mutable + Persistable journal.
impl<F, E, U, C, I, H> Db<F, E, C, I, H, U>
where
    F: Family,
    E: Context,
    U: Update,
    C: Mutable<Item = Operation<F, U>> + Persistable<Error = JournalError>,
    I: UnorderedIndex<Value = Location<F>>,
    H: Hasher,
    Operation<F, U>: Codec,
{
    /// Returns a [Db] initialized from `log`, using `callback` to report snapshot
    /// building events.
    ///
    /// # Panics
    ///
    /// Panics if the log is empty or the last operation is not a commit floor operation.
    pub async fn init_from_log<Cb>(
        mut index: I,
        log: AuthenticatedLog<F, E, C, H>,
        known_inactivity_floor: Option<Location<F>>,
        mut callback: Cb,
    ) -> Result<Self, crate::qmdb::Error<F>>
    where
        Cb: FnMut(bool, Option<Location<F>>),
    {
        // If the last-known inactivity floor is behind the current floor, then invoke the callback
        // appropriately to report the inactive bits.
        let (last_commit_loc, inactivity_floor_loc, active_keys) = {
            let reader = log.reader().await;
            let last_commit_loc = reader
                .bounds()
                .end
                .checked_sub(1)
                .expect("commit should exist");
            let last_commit = reader.read(last_commit_loc).await?;
            let inactivity_floor_loc = last_commit.has_floor().expect("should be a commit");
            if let Some(known_inactivity_floor) = known_inactivity_floor {
                (*known_inactivity_floor..*inactivity_floor_loc)
                    .for_each(|_| callback(false, None));
            }

            let active_keys =
                build_snapshot_from_log(inactivity_floor_loc, &reader, &mut index, callback)
                    .await?;
            (
                Location::new(last_commit_loc),
                inactivity_floor_loc,
                active_keys,
            )
        };

        Ok(Self {
            log,
            inactivity_floor_loc,
            snapshot: index,
            last_commit_loc,
            active_keys,
            _update: core::marker::PhantomData,
        })
    }

    /// Sync all database state to disk.
    pub async fn sync(&self) -> Result<(), crate::qmdb::Error<F>> {
        self.log.sync().await.map_err(Into::into)
    }

    /// Durably commit the journal state published by prior [`Db::apply_batch`]
    /// calls.
    pub async fn commit(&self) -> Result<(), crate::qmdb::Error<F>> {
        self.log.commit().await.map_err(Into::into)
    }

    /// Destroy the db, removing all data from disk.
    pub async fn destroy(self) -> Result<(), crate::qmdb::Error<F>> {
        self.log.destroy().await.map_err(Into::into)
    }
}

impl<F, E, U, C, I, H> Persistable for Db<F, E, C, I, H, U>
where
    F: Family,
    E: Context,
    U: Update,
    C: Mutable<Item = Operation<F, U>> + Persistable<Error = JournalError>,
    I: UnorderedIndex<Value = Location<F>>,
    H: Hasher,
    Operation<F, U>: Codec,
{
    type Error = crate::qmdb::Error<F>;

    async fn commit(&self) -> Result<(), crate::qmdb::Error<F>> {
        Self::commit(self).await
    }

    async fn sync(&self) -> Result<(), crate::qmdb::Error<F>> {
        Self::sync(self).await
    }

    async fn destroy(self) -> Result<(), crate::qmdb::Error<F>> {
        self.destroy().await
    }
}