Skip to main content

common/coordinator/
traits.rs

1use crate::StorageRead;
2use crate::storage::StorageSnapshot;
3use async_trait::async_trait;
4use std::ops::Range;
5use std::sync::Arc;
6
7/// The level of durability for a write.
8///
9/// Durability levels form an ordered progression: `Applied < Flushed < Durable`.
10/// Each level provides stronger guarantees about write persistence.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
12pub enum Durability {
13    Applied,
14    Flushed,
15    Durable,
16}
17
18/// A delta accumulates writes and can produce a frozen snapshot for flushing.
19///
20/// The write coordinator manages a pipeline of three stages, each represented
21/// by a different type:
22///
23/// - **`Delta`** (this trait) — the mutable, in-progress batch. Writes are
24///   applied here until the delta is frozen.
25/// - **`Frozen`** — an immutable snapshot of the delta, sent to the
26///   [`Flusher`] to be persisted to storage.
27/// - **`Broadcast`** — a minimal representation of the flushed state
28///   that readers need to update their read image.
29pub trait Delta: Sized + Send + Sync + 'static {
30    /// Mutable state owned by the delta while it accumulates writes.
31    /// Returned to the write coordinator on [`freeze`](Delta::freeze) so the
32    /// next delta can continue where this one left off.
33    type Context: Send + Sync + 'static;
34    /// A single write operation applied via [`apply`](Delta::apply).
35    type Write: Send + 'static;
36    /// Immutable snapshot produced by [`freeze`](Delta::freeze), consumed by
37    /// the [`Flusher`] to persist the batch to storage.
38    type Frozen: Send + Sync + 'static;
39    /// Provides an interface for reading the frozen delta. Though Frozen is immutable, we
40    /// support specifying a distinct read type to allow implementers to provide a different
41    /// representation or view of flushed state. For example, readers that only allow reading
42    /// the data flushed to storage can materialize a minimal view of metadata to allow the reader
43    /// to cheaply update the read image when a new view is broadcast after a flush, while at the
44    /// same type allowing Frozen to be owned by the coordinator so the contained data doesn't
45    /// need to be copied during flush.
46    type FrozenView: Clone + Send + Sync + 'static;
47    /// Metadata returned from [`apply`](Delta::apply), delivered to the caller
48    /// through [`WriteHandle::wait`](super::WriteHandle::wait).
49    type ApplyResult: Clone + Send + 'static;
50    /// Provides an interface for reading the current delta. The specific read API
51    /// is up to the delta implementation. It is up to the implementation to provide
52    /// the APIs required for a given database, including support for snapshot isolation
53    type DeltaView: Clone + Send + Sync + 'static;
54
55    /// Create a new delta initialized from a snapshot context.
56    /// The delta takes ownership of the context while it is mutable.
57    fn init(context: Self::Context) -> Self;
58
59    /// Apply a write to the delta and return a result for the caller.
60    fn apply(&mut self, write: Self::Write) -> Result<Self::ApplyResult, String>;
61
62    /// Estimate the size of the delta in bytes.
63    fn estimate_size(&self) -> usize;
64
65    /// Freezes the current delta, creating an image with the delta
66    /// applied.
67    ///
68    /// Returns the frozen delta and the context (which was owned by the delta).
69    /// Implementations should ensure this operation is efficient (e.g., via
70    /// copy-on-write or reference counting) since it blocks writes. After this
71    /// is complete, the [`Flusher::flush`] happens on a background thread.
72    fn freeze(self) -> (Self::Frozen, Self::FrozenView, Self::Context);
73
74    fn reader(&self) -> Self::DeltaView;
75}
76
77/// A value representing data written with some range of epochs
78#[derive(Clone)]
79pub struct EpochStamped<T> {
80    pub val: T,
81    /// The range of epochs contained in this value (exclusive end).
82    /// Start is the first epoch in the flush, end is one past the last epoch.
83    pub epoch_range: Range<u64>,
84}
85
86impl<T> EpochStamped<T> {
87    pub(crate) fn new(val: T, epoch_range: Range<u64>) -> Self {
88        Self { val, epoch_range }
89    }
90}
91
92/// A flusher persists frozen deltas and ensures storage durability.
93#[async_trait]
94pub trait Flusher<D: Delta>: Send + Sync + 'static {
95    /// Flush a frozen delta to storage.
96    ///
97    /// Consumes the frozen delta by value and returns a snapshot for readers
98    /// along with a broadcast payload for subscribers.
99    async fn flush_delta(
100        &self,
101        frozen: D::Frozen,
102        epoch_range: &Range<u64>,
103    ) -> Result<Arc<dyn StorageSnapshot>, String>;
104
105    /// Ensure storage durability (e.g. call storage.flush()).
106    async fn flush_storage(&self) -> Result<(), String>;
107}