Skip to main content

reifydb_store_multi/tier/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4//! Common storage tier traits and types.
5//!
6//! This module defines the minimal interface that all storage tiers (hot, warm, cold)
7//! must implement. All MVCC, CDC, and routing logic belongs in the store layer above.
8
9use std::{collections::HashMap, ops::Bound};
10
11use reifydb_core::{
12	common::CommitVersion,
13	interface::catalog::{flow::FlowNodeId, primitive::PrimitiveId},
14};
15use reifydb_type::{Result, util::cowvec::CowVec};
16
17/// Identifies a logical table/namespace in storage.
18///
19/// The store layer routes keys to the appropriate storage based on key type.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
21pub enum EntryKind {
22	/// Multi-version storage for general data
23	Multi,
24	/// Per-source table for row data
25	Source(PrimitiveId),
26	/// Per-operator table for flow node state
27	Operator(FlowNodeId),
28}
29
30/// A raw storage entry with version.
31///
32/// Value is None for tombstones (deletions).
33#[derive(Debug, Clone)]
34pub struct RawEntry {
35	pub key: CowVec<u8>,
36	pub version: CommitVersion,
37	pub value: Option<CowVec<u8>>,
38}
39
40/// A batch of range results with continuation info for pagination.
41#[derive(Debug, Clone)]
42pub struct RangeBatch {
43	/// The entries in this batch.
44	pub entries: Vec<RawEntry>,
45	/// Whether there are more entries after this batch.
46	pub has_more: bool,
47}
48
49impl RangeBatch {
50	/// Creates an empty batch with no more results.
51	pub fn empty() -> Self {
52		Self {
53			entries: Vec::new(),
54			has_more: false,
55		}
56	}
57
58	/// Returns true if this batch contains no entries.
59	pub fn is_empty(&self) -> bool {
60		self.entries.is_empty()
61	}
62}
63
64/// Cursor state for streaming range queries.
65///
66/// Tracks position within a range scan, enabling efficient continuation
67/// across multiple batches without re-scanning from the beginning.
68#[derive(Debug, Clone)]
69pub struct RangeCursor {
70	/// Last key seen in the previous batch (for Bound::Excluded continuation)
71	pub last_key: Option<CowVec<u8>>,
72	/// Whether this stream is exhausted
73	pub exhausted: bool,
74}
75
76impl RangeCursor {
77	/// Create a new cursor at the start of a range.
78	pub fn new() -> Self {
79		Self {
80			last_key: None,
81			exhausted: false,
82		}
83	}
84
85	/// Check if the stream is exhausted.
86	pub fn is_exhausted(&self) -> bool {
87		self.exhausted
88	}
89}
90
91impl Default for RangeCursor {
92	fn default() -> Self {
93		Self::new()
94	}
95}
96
97/// The tier storage trait.
98///
99/// This is intentionally minimal - just raw bytes in/out.
100/// Version is a first-class parameter for all operations.
101/// All MVCC, CDC, and routing logic belongs in the store layer above.
102///
103/// Implementations must be thread-safe and cloneable.
104
105pub trait TierStorage: Send + Sync + Clone + 'static {
106	/// Get the value for a key at or before the given version.
107	fn get(&self, table: EntryKind, key: &[u8], version: CommitVersion) -> Result<Option<CowVec<u8>>>;
108
109	/// Check if a key exists at or before the given version.
110	fn contains(&self, table: EntryKind, key: &[u8], version: CommitVersion) -> Result<bool> {
111		Ok(self.get(table, key, version)?.is_some())
112	}
113
114	/// Write entries to multiple tables atomically at the given version.
115	///
116	/// All entries across all tables are written in a single transaction.
117	/// This ensures durability and atomicity for multi-table commits.
118	fn set(
119		&self,
120		version: CommitVersion,
121		batches: HashMap<EntryKind, Vec<(CowVec<u8>, Option<CowVec<u8>>)>>,
122	) -> Result<()>;
123
124	/// Fetch the next batch of entries in key order at or before version.
125	///
126	/// Uses the cursor to track position. On first call, cursor should be new.
127	/// On subsequent calls, pass the same cursor to continue from where left off.
128	/// Returns up to `batch_size` entries. The cursor is updated with the last
129	/// key seen, and `exhausted` is set to true when no more entries remain.
130	fn range_next(
131		&self,
132		table: EntryKind,
133		cursor: &mut RangeCursor,
134		start: Bound<&[u8]>,
135		end: Bound<&[u8]>,
136		version: CommitVersion,
137		batch_size: usize,
138	) -> Result<RangeBatch>;
139
140	/// Fetch the next batch of entries in reverse key order at or before version.
141	///
142	/// Uses the cursor to track position. On first call, cursor should be new.
143	/// On subsequent calls, pass the same cursor to continue from where left off.
144	/// Returns up to `batch_size` entries. The cursor is updated with the last
145	/// key seen, and `exhausted` is set to true when no more entries remain.
146	fn range_rev_next(
147		&self,
148		table: EntryKind,
149		cursor: &mut RangeCursor,
150		start: Bound<&[u8]>,
151		end: Bound<&[u8]>,
152		version: CommitVersion,
153		batch_size: usize,
154	) -> Result<RangeBatch>;
155
156	/// Ensure a table exists (creates if needed).
157	///
158	/// For memory backends this is typically a no-op.
159	/// For SQL backends this may create tables.
160	fn ensure_table(&self, table: EntryKind) -> Result<()>;
161
162	/// Delete all entries in a table.
163	fn clear_table(&self, table: EntryKind) -> Result<()>;
164
165	/// Physically drop specific versions of entries from storage.
166	///
167	/// Unlike `set()` with None values which inserts tombstones for MVCC,
168	/// this method actually removes entries from storage to reclaim memory.
169	/// Used by the drop worker to erase old versions after they're no longer needed.
170	///
171	/// Each entry in the batch is a (key, version) pair identifying the specific
172	/// version of the key to remove.
173	fn drop(&self, batches: HashMap<EntryKind, Vec<(CowVec<u8>, CommitVersion)>>) -> Result<()>;
174
175	/// Get all versions of a specific key (for internal cleanup operations).
176	///
177	/// Unlike `get()` which does MVCC resolution, this returns ALL stored versions
178	/// of the key with their values. Used by the drop worker to discover which
179	/// versions exist before deciding which to clean up.
180	///
181	/// Returns a vector of (version, value) pairs, sorted by version descending.
182	fn get_all_versions(&self, table: EntryKind, key: &[u8]) -> Result<Vec<(CommitVersion, Option<CowVec<u8>>)>>;
183}
184
185/// Marker trait for storage tiers that support the tier storage interface.
186pub trait TierBackend: TierStorage {}