reifydb_store_multi/tier/mod.rs
1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4//! Common storage tier traits and types.
5//!
6//! This module defines the minimal interface that all storage tiers (hot, warm, cold)
7//! must implement. All MVCC, CDC, and routing logic belongs in the store layer above.
8
9use std::{collections::HashMap, ops::Bound};
10
11use reifydb_core::{
12 common::CommitVersion,
13 interface::catalog::{flow::FlowNodeId, primitive::PrimitiveId},
14};
15use reifydb_type::{Result, util::cowvec::CowVec};
16
17/// Identifies a logical table/namespace in storage.
18///
19/// The store layer routes keys to the appropriate storage based on key type.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
21pub enum EntryKind {
22 /// Multi-version storage for general data
23 Multi,
24 /// Per-source table for row data
25 Source(PrimitiveId),
26 /// Per-operator table for flow node state
27 Operator(FlowNodeId),
28}
29
30/// A raw storage entry with version.
31///
32/// Value is None for tombstones (deletions).
33#[derive(Debug, Clone)]
34pub struct RawEntry {
35 pub key: CowVec<u8>,
36 pub version: CommitVersion,
37 pub value: Option<CowVec<u8>>,
38}
39
40/// A batch of range results with continuation info for pagination.
41#[derive(Debug, Clone)]
42pub struct RangeBatch {
43 /// The entries in this batch.
44 pub entries: Vec<RawEntry>,
45 /// Whether there are more entries after this batch.
46 pub has_more: bool,
47}
48
49impl RangeBatch {
50 /// Creates an empty batch with no more results.
51 pub fn empty() -> Self {
52 Self {
53 entries: Vec::new(),
54 has_more: false,
55 }
56 }
57
58 /// Returns true if this batch contains no entries.
59 pub fn is_empty(&self) -> bool {
60 self.entries.is_empty()
61 }
62}
63
64/// Cursor state for streaming range queries.
65///
66/// Tracks position within a range scan, enabling efficient continuation
67/// across multiple batches without re-scanning from the beginning.
68#[derive(Debug, Clone)]
69pub struct RangeCursor {
70 /// Last key seen in the previous batch (for Bound::Excluded continuation)
71 pub last_key: Option<CowVec<u8>>,
72 /// Whether this stream is exhausted
73 pub exhausted: bool,
74}
75
76impl RangeCursor {
77 /// Create a new cursor at the start of a range.
78 pub fn new() -> Self {
79 Self {
80 last_key: None,
81 exhausted: false,
82 }
83 }
84
85 /// Check if the stream is exhausted.
86 pub fn is_exhausted(&self) -> bool {
87 self.exhausted
88 }
89}
90
91impl Default for RangeCursor {
92 fn default() -> Self {
93 Self::new()
94 }
95}
96
97/// The tier storage trait.
98///
99/// This is intentionally minimal - just raw bytes in/out.
100/// Version is a first-class parameter for all operations.
101/// All MVCC, CDC, and routing logic belongs in the store layer above.
102///
103/// Implementations must be thread-safe and cloneable.
104
105pub trait TierStorage: Send + Sync + Clone + 'static {
106 /// Get the value for a key at or before the given version.
107 fn get(&self, table: EntryKind, key: &[u8], version: CommitVersion) -> Result<Option<CowVec<u8>>>;
108
109 /// Check if a key exists at or before the given version.
110 fn contains(&self, table: EntryKind, key: &[u8], version: CommitVersion) -> Result<bool> {
111 Ok(self.get(table, key, version)?.is_some())
112 }
113
114 /// Write entries to multiple tables atomically at the given version.
115 ///
116 /// All entries across all tables are written in a single transaction.
117 /// This ensures durability and atomicity for multi-table commits.
118 fn set(
119 &self,
120 version: CommitVersion,
121 batches: HashMap<EntryKind, Vec<(CowVec<u8>, Option<CowVec<u8>>)>>,
122 ) -> Result<()>;
123
124 /// Fetch the next batch of entries in key order at or before version.
125 ///
126 /// Uses the cursor to track position. On first call, cursor should be new.
127 /// On subsequent calls, pass the same cursor to continue from where left off.
128 /// Returns up to `batch_size` entries. The cursor is updated with the last
129 /// key seen, and `exhausted` is set to true when no more entries remain.
130 fn range_next(
131 &self,
132 table: EntryKind,
133 cursor: &mut RangeCursor,
134 start: Bound<&[u8]>,
135 end: Bound<&[u8]>,
136 version: CommitVersion,
137 batch_size: usize,
138 ) -> Result<RangeBatch>;
139
140 /// Fetch the next batch of entries in reverse key order at or before version.
141 ///
142 /// Uses the cursor to track position. On first call, cursor should be new.
143 /// On subsequent calls, pass the same cursor to continue from where left off.
144 /// Returns up to `batch_size` entries. The cursor is updated with the last
145 /// key seen, and `exhausted` is set to true when no more entries remain.
146 fn range_rev_next(
147 &self,
148 table: EntryKind,
149 cursor: &mut RangeCursor,
150 start: Bound<&[u8]>,
151 end: Bound<&[u8]>,
152 version: CommitVersion,
153 batch_size: usize,
154 ) -> Result<RangeBatch>;
155
156 /// Ensure a table exists (creates if needed).
157 ///
158 /// For memory backends this is typically a no-op.
159 /// For SQL backends this may create tables.
160 fn ensure_table(&self, table: EntryKind) -> Result<()>;
161
162 /// Delete all entries in a table.
163 fn clear_table(&self, table: EntryKind) -> Result<()>;
164
165 /// Physically drop specific versions of entries from storage.
166 ///
167 /// Unlike `set()` with None values which inserts tombstones for MVCC,
168 /// this method actually removes entries from storage to reclaim memory.
169 /// Used by the drop worker to erase old versions after they're no longer needed.
170 ///
171 /// Each entry in the batch is a (key, version) pair identifying the specific
172 /// version of the key to remove.
173 fn drop(&self, batches: HashMap<EntryKind, Vec<(CowVec<u8>, CommitVersion)>>) -> Result<()>;
174
175 /// Get all versions of a specific key (for internal cleanup operations).
176 ///
177 /// Unlike `get()` which does MVCC resolution, this returns ALL stored versions
178 /// of the key with their values. Used by the drop worker to discover which
179 /// versions exist before deciding which to clean up.
180 ///
181 /// Returns a vector of (version, value) pairs, sorted by version descending.
182 fn get_all_versions(&self, table: EntryKind, key: &[u8]) -> Result<Vec<(CommitVersion, Option<CowVec<u8>>)>>;
183}
184
185/// Marker trait for storage tiers that support the tier storage interface.
186pub trait TierBackend: TierStorage {}