Skip to main content

reifydb_store_multi/tier/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4//! Common storage tier traits and types.
5//!
6//! This module defines the minimal interface that all storage tiers (hot, warm, cold)
7//! must implement. All MVCC, CDC, and routing logic belongs in the store layer above.
8
9use std::{collections::HashMap, ops::Bound};
10
11use reifydb_core::{common::CommitVersion, interface::store::EntryKind};
12use reifydb_type::{Result, util::cowvec::CowVec};
13
14/// A batch of key-value entries grouped by entry kind, used for atomic multi-table writes.
15pub type TierBatch = HashMap<EntryKind, Vec<(CowVec<u8>, Option<CowVec<u8>>)>>;
16
17/// A raw storage entry with version.
18///
19/// Value is None for tombstones (deletions).
20#[derive(Debug, Clone)]
21pub struct RawEntry {
22	pub key: CowVec<u8>,
23	pub version: CommitVersion,
24	pub value: Option<CowVec<u8>>,
25}
26
27/// A batch of range results with continuation info for pagination.
28#[derive(Debug, Clone)]
29pub struct RangeBatch {
30	/// The entries in this batch.
31	pub entries: Vec<RawEntry>,
32	/// Whether there are more entries after this batch.
33	pub has_more: bool,
34}
35
36impl RangeBatch {
37	/// Creates an empty batch with no more results.
38	pub fn empty() -> Self {
39		Self {
40			entries: Vec::new(),
41			has_more: false,
42		}
43	}
44
45	/// Returns true if this batch contains no entries.
46	pub fn is_empty(&self) -> bool {
47		self.entries.is_empty()
48	}
49}
50
51/// Cursor state for streaming range queries.
52///
53/// Tracks position within a range scan, enabling efficient continuation
54/// across multiple batches without re-scanning from the beginning.
55#[derive(Debug, Clone)]
56pub struct RangeCursor {
57	/// Last key seen in the previous batch (for Bound::Excluded continuation)
58	pub last_key: Option<CowVec<u8>>,
59	/// Whether this stream is exhausted
60	pub exhausted: bool,
61}
62
63impl RangeCursor {
64	/// Create a new cursor at the start of a range.
65	pub fn new() -> Self {
66		Self {
67			last_key: None,
68			exhausted: false,
69		}
70	}
71
72	/// Check if the stream is exhausted.
73	pub fn is_exhausted(&self) -> bool {
74		self.exhausted
75	}
76}
77
78impl Default for RangeCursor {
79	fn default() -> Self {
80		Self::new()
81	}
82}
83
84/// The tier storage trait.
85///
86/// This is intentionally minimal - just raw bytes in/out.
87/// Version is a first-class parameter for all operations.
88/// All MVCC, CDC, and routing logic belongs in the store layer above.
89///
90/// Implementations must be thread-safe and cloneable.
91pub trait TierStorage: Send + Sync + Clone + 'static {
92	/// Get the value for a key at or before the given version.
93	fn get(&self, table: EntryKind, key: &[u8], version: CommitVersion) -> Result<Option<CowVec<u8>>>;
94
95	/// Check if a key exists at or before the given version.
96	fn contains(&self, table: EntryKind, key: &[u8], version: CommitVersion) -> Result<bool> {
97		Ok(self.get(table, key, version)?.is_some())
98	}
99
100	/// Write entries to multiple tables atomically at the given version.
101	///
102	/// All entries across all tables are written in a single transaction.
103	/// This ensures durability and atomicity for multi-table commits.
104	fn set(&self, version: CommitVersion, batches: TierBatch) -> Result<()>;
105
106	/// Fetch the next batch of entries in key order at or before version.
107	///
108	/// Uses the cursor to track position. On first call, cursor should be new.
109	/// On subsequent calls, pass the same cursor to continue from where left off.
110	/// Returns up to `batch_size` entries. The cursor is updated with the last
111	/// key seen, and `exhausted` is set to true when no more entries remain.
112	fn range_next(
113		&self,
114		table: EntryKind,
115		cursor: &mut RangeCursor,
116		start: Bound<&[u8]>,
117		end: Bound<&[u8]>,
118		version: CommitVersion,
119		batch_size: usize,
120	) -> Result<RangeBatch>;
121
122	/// Fetch the next batch of entries in reverse key order at or before version.
123	///
124	/// Uses the cursor to track position. On first call, cursor should be new.
125	/// On subsequent calls, pass the same cursor to continue from where left off.
126	/// Returns up to `batch_size` entries. The cursor is updated with the last
127	/// key seen, and `exhausted` is set to true when no more entries remain.
128	fn range_rev_next(
129		&self,
130		table: EntryKind,
131		cursor: &mut RangeCursor,
132		start: Bound<&[u8]>,
133		end: Bound<&[u8]>,
134		version: CommitVersion,
135		batch_size: usize,
136	) -> Result<RangeBatch>;
137
138	/// Ensure a table exists (creates if needed).
139	///
140	/// For memory backends this is typically a no-op.
141	/// For SQL backends this may create tables.
142	fn ensure_table(&self, table: EntryKind) -> Result<()>;
143
144	/// Delete all entries in a table.
145	fn clear_table(&self, table: EntryKind) -> Result<()>;
146
147	/// Physically drop specific versions of entries from storage.
148	///
149	/// Unlike `set()` with None values which inserts tombstones for MVCC,
150	/// this method actually removes entries from storage to reclaim memory.
151	/// Used by the drop worker to erase old versions after they're no longer needed.
152	///
153	/// Each entry in the batch is a (key, version) pair identifying the specific
154	/// version of the key to remove.
155	fn drop(&self, batches: HashMap<EntryKind, Vec<(CowVec<u8>, CommitVersion)>>) -> Result<()>;
156
157	/// Get all versions of a specific key (for internal cleanup operations).
158	///
159	/// Unlike `get()` which does MVCC resolution, this returns ALL stored versions
160	/// of the key with their values. Used by the drop worker to discover which
161	/// versions exist before deciding which to clean up.
162	///
163	/// Returns a vector of (version, value) pairs, sorted by version descending.
164	fn get_all_versions(&self, table: EntryKind, key: &[u8]) -> Result<Vec<(CommitVersion, Option<CowVec<u8>>)>>;
165}
166
167/// Marker trait for storage tiers that support the tier storage interface.
168pub trait TierBackend: TierStorage {}