Skip to main content

reifydb_store_multi/tier/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use std::{collections::HashMap, ops::Bound};
5
6use reifydb_core::{common::CommitVersion, interface::store::EntryKind};
7use reifydb_type::{Result, util::cowvec::CowVec};
8
9/// A batch of key-value entries grouped by entry kind, used for atomic multi-table writes.
10pub type TierBatch = HashMap<EntryKind, Vec<(CowVec<u8>, Option<CowVec<u8>>)>>;
11
12/// A raw storage entry with version.
13///
14/// Value is None for tombstones (deletions).
15#[derive(Debug, Clone)]
16pub struct RawEntry {
17	pub key: CowVec<u8>,
18	pub version: CommitVersion,
19	pub value: Option<CowVec<u8>>,
20}
21
22/// A batch of range results with continuation info for pagination.
23#[derive(Debug, Clone)]
24pub struct RangeBatch {
25	/// The entries in this batch.
26	pub entries: Vec<RawEntry>,
27	/// Whether there are more entries after this batch.
28	pub has_more: bool,
29}
30
31impl RangeBatch {
32	/// Creates an empty batch with no more results.
33	pub fn empty() -> Self {
34		Self {
35			entries: Vec::new(),
36			has_more: false,
37		}
38	}
39
40	/// Returns true if this batch contains no entries.
41	pub fn is_empty(&self) -> bool {
42		self.entries.is_empty()
43	}
44}
45
46/// Cursor state for streaming range queries.
47///
48/// Tracks position within a range scan, enabling efficient continuation
49/// across multiple batches without re-scanning from the beginning.
50#[derive(Debug, Clone)]
51pub struct RangeCursor {
52	/// Last key seen in the previous batch (for Bound::Excluded continuation)
53	pub last_key: Option<CowVec<u8>>,
54	/// Whether this stream is exhausted
55	pub exhausted: bool,
56}
57
58impl RangeCursor {
59	/// Create a new cursor at the start of a range.
60	pub fn new() -> Self {
61		Self {
62			last_key: None,
63			exhausted: false,
64		}
65	}
66
67	/// Check if the stream is exhausted.
68	pub fn is_exhausted(&self) -> bool {
69		self.exhausted
70	}
71}
72
73impl Default for RangeCursor {
74	fn default() -> Self {
75		Self::new()
76	}
77}
78
79/// The tier storage trait.
80///
81/// This is intentionally minimal - just raw bytes in/out.
82/// Version is a first-class parameter for all operations.
83/// All MVCC, CDC, and routing logic belongs in the store layer above.
84///
85/// Implementations must be thread-safe and cloneable.
86pub trait TierStorage: Send + Sync + Clone + 'static {
87	/// Get the value for a key at or before the given version.
88	fn get(&self, table: EntryKind, key: &[u8], version: CommitVersion) -> Result<Option<CowVec<u8>>>;
89
90	/// Check if a key exists at or before the given version.
91	fn contains(&self, table: EntryKind, key: &[u8], version: CommitVersion) -> Result<bool> {
92		Ok(self.get(table, key, version)?.is_some())
93	}
94
95	/// Write entries to multiple tables atomically at the given version.
96	///
97	/// All entries across all tables are written in a single transaction.
98	/// This ensures durability and atomicity for multi-table commits.
99	fn set(&self, version: CommitVersion, batches: TierBatch) -> Result<()>;
100
101	/// Fetch the next batch of entries in key order at or before version.
102	///
103	/// Uses the cursor to track position. On first call, cursor should be new.
104	/// On subsequent calls, pass the same cursor to continue from where left off.
105	/// Returns up to `batch_size` entries. The cursor is updated with the last
106	/// key seen, and `exhausted` is set to true when no more entries remain.
107	fn range_next(
108		&self,
109		table: EntryKind,
110		cursor: &mut RangeCursor,
111		start: Bound<&[u8]>,
112		end: Bound<&[u8]>,
113		version: CommitVersion,
114		batch_size: usize,
115	) -> Result<RangeBatch>;
116
117	/// Fetch the next batch of entries in reverse key order at or before version.
118	///
119	/// Uses the cursor to track position. On first call, cursor should be new.
120	/// On subsequent calls, pass the same cursor to continue from where left off.
121	/// Returns up to `batch_size` entries. The cursor is updated with the last
122	/// key seen, and `exhausted` is set to true when no more entries remain.
123	fn range_rev_next(
124		&self,
125		table: EntryKind,
126		cursor: &mut RangeCursor,
127		start: Bound<&[u8]>,
128		end: Bound<&[u8]>,
129		version: CommitVersion,
130		batch_size: usize,
131	) -> Result<RangeBatch>;
132
133	/// Ensure a table exists (creates if needed).
134	///
135	/// For memory backends this is typically a no-op.
136	/// For SQL backends this may create tables.
137	fn ensure_table(&self, table: EntryKind) -> Result<()>;
138
139	/// Delete all entries in a table.
140	fn clear_table(&self, table: EntryKind) -> Result<()>;
141
142	/// Physically drop specific versions of entries from storage.
143	///
144	/// Unlike `set()` with None values which inserts tombstones for MVCC,
145	/// this method actually removes entries from storage to reclaim memory.
146	/// Used by the drop worker to erase old versions after they're no longer needed.
147	///
148	/// Each entry in the batch is a (key, version) pair identifying the specific
149	/// version of the key to remove.
150	fn drop(&self, batches: HashMap<EntryKind, Vec<(CowVec<u8>, CommitVersion)>>) -> Result<()>;
151
152	/// Get all versions of a specific key (for internal cleanup operations).
153	///
154	/// Unlike `get()` which does MVCC resolution, this returns ALL stored versions
155	/// of the key with their values. Used by the drop worker to discover which
156	/// versions exist before deciding which to clean up.
157	///
158	/// Returns a vector of (version, value) pairs, sorted by version descending.
159	fn get_all_versions(&self, table: EntryKind, key: &[u8]) -> Result<Vec<(CommitVersion, Option<CowVec<u8>>)>>;
160}
161
162/// Marker trait for storage tiers that support the tier storage interface.
163pub trait TierBackend: TierStorage {}