reifydb_store_multi/tier/mod.rs
1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4//! Common storage tier traits and types.
5//!
6//! This module defines the minimal interface that all storage tiers (hot, warm, cold)
7//! must implement. All MVCC, CDC, and routing logic belongs in the store layer above.
8
9use std::{collections::HashMap, ops::Bound};
10
11use reifydb_core::{common::CommitVersion, interface::store::EntryKind};
12use reifydb_type::{Result, util::cowvec::CowVec};
13
14/// A batch of key-value entries grouped by entry kind, used for atomic multi-table writes.
15pub type TierBatch = HashMap<EntryKind, Vec<(CowVec<u8>, Option<CowVec<u8>>)>>;
16
17/// A raw storage entry with version.
18///
19/// Value is None for tombstones (deletions).
20#[derive(Debug, Clone)]
21pub struct RawEntry {
22 pub key: CowVec<u8>,
23 pub version: CommitVersion,
24 pub value: Option<CowVec<u8>>,
25}
26
27/// A batch of range results with continuation info for pagination.
28#[derive(Debug, Clone)]
29pub struct RangeBatch {
30 /// The entries in this batch.
31 pub entries: Vec<RawEntry>,
32 /// Whether there are more entries after this batch.
33 pub has_more: bool,
34}
35
36impl RangeBatch {
37 /// Creates an empty batch with no more results.
38 pub fn empty() -> Self {
39 Self {
40 entries: Vec::new(),
41 has_more: false,
42 }
43 }
44
45 /// Returns true if this batch contains no entries.
46 pub fn is_empty(&self) -> bool {
47 self.entries.is_empty()
48 }
49}
50
51/// Cursor state for streaming range queries.
52///
53/// Tracks position within a range scan, enabling efficient continuation
54/// across multiple batches without re-scanning from the beginning.
55#[derive(Debug, Clone)]
56pub struct RangeCursor {
57 /// Last key seen in the previous batch (for Bound::Excluded continuation)
58 pub last_key: Option<CowVec<u8>>,
59 /// Whether this stream is exhausted
60 pub exhausted: bool,
61}
62
63impl RangeCursor {
64 /// Create a new cursor at the start of a range.
65 pub fn new() -> Self {
66 Self {
67 last_key: None,
68 exhausted: false,
69 }
70 }
71
72 /// Check if the stream is exhausted.
73 pub fn is_exhausted(&self) -> bool {
74 self.exhausted
75 }
76}
77
78impl Default for RangeCursor {
79 fn default() -> Self {
80 Self::new()
81 }
82}
83
84/// The tier storage trait.
85///
86/// This is intentionally minimal - just raw bytes in/out.
87/// Version is a first-class parameter for all operations.
88/// All MVCC, CDC, and routing logic belongs in the store layer above.
89///
90/// Implementations must be thread-safe and cloneable.
91pub trait TierStorage: Send + Sync + Clone + 'static {
92 /// Get the value for a key at or before the given version.
93 fn get(&self, table: EntryKind, key: &[u8], version: CommitVersion) -> Result<Option<CowVec<u8>>>;
94
95 /// Check if a key exists at or before the given version.
96 fn contains(&self, table: EntryKind, key: &[u8], version: CommitVersion) -> Result<bool> {
97 Ok(self.get(table, key, version)?.is_some())
98 }
99
100 /// Write entries to multiple tables atomically at the given version.
101 ///
102 /// All entries across all tables are written in a single transaction.
103 /// This ensures durability and atomicity for multi-table commits.
104 fn set(&self, version: CommitVersion, batches: TierBatch) -> Result<()>;
105
106 /// Fetch the next batch of entries in key order at or before version.
107 ///
108 /// Uses the cursor to track position. On first call, cursor should be new.
109 /// On subsequent calls, pass the same cursor to continue from where left off.
110 /// Returns up to `batch_size` entries. The cursor is updated with the last
111 /// key seen, and `exhausted` is set to true when no more entries remain.
112 fn range_next(
113 &self,
114 table: EntryKind,
115 cursor: &mut RangeCursor,
116 start: Bound<&[u8]>,
117 end: Bound<&[u8]>,
118 version: CommitVersion,
119 batch_size: usize,
120 ) -> Result<RangeBatch>;
121
122 /// Fetch the next batch of entries in reverse key order at or before version.
123 ///
124 /// Uses the cursor to track position. On first call, cursor should be new.
125 /// On subsequent calls, pass the same cursor to continue from where left off.
126 /// Returns up to `batch_size` entries. The cursor is updated with the last
127 /// key seen, and `exhausted` is set to true when no more entries remain.
128 fn range_rev_next(
129 &self,
130 table: EntryKind,
131 cursor: &mut RangeCursor,
132 start: Bound<&[u8]>,
133 end: Bound<&[u8]>,
134 version: CommitVersion,
135 batch_size: usize,
136 ) -> Result<RangeBatch>;
137
138 /// Ensure a table exists (creates if needed).
139 ///
140 /// For memory backends this is typically a no-op.
141 /// For SQL backends this may create tables.
142 fn ensure_table(&self, table: EntryKind) -> Result<()>;
143
144 /// Delete all entries in a table.
145 fn clear_table(&self, table: EntryKind) -> Result<()>;
146
147 /// Physically drop specific versions of entries from storage.
148 ///
149 /// Unlike `set()` with None values which inserts tombstones for MVCC,
150 /// this method actually removes entries from storage to reclaim memory.
151 /// Used by the drop worker to erase old versions after they're no longer needed.
152 ///
153 /// Each entry in the batch is a (key, version) pair identifying the specific
154 /// version of the key to remove.
155 fn drop(&self, batches: HashMap<EntryKind, Vec<(CowVec<u8>, CommitVersion)>>) -> Result<()>;
156
157 /// Get all versions of a specific key (for internal cleanup operations).
158 ///
159 /// Unlike `get()` which does MVCC resolution, this returns ALL stored versions
160 /// of the key with their values. Used by the drop worker to discover which
161 /// versions exist before deciding which to clean up.
162 ///
163 /// Returns a vector of (version, value) pairs, sorted by version descending.
164 fn get_all_versions(&self, table: EntryKind, key: &[u8]) -> Result<Vec<(CommitVersion, Option<CowVec<u8>>)>>;
165}
166
167/// Marker trait for storage tiers that support the tier storage interface.
168pub trait TierBackend: TierStorage {}