llkv_column_map/types.rs
1//! Core type definitions for the columnar storage engine.
2//!
3//! This module defines the fundamental types used to identify and organize data:
4//! - [`LogicalFieldId`]: Namespaced 64-bit identifier for columns
5//! - [`Namespace`]: Categories of data (user columns, system metadata, MVCC tracking)
6//! - Type aliases for table IDs, field IDs, and row IDs
7
8// FIXME: Since upgrading to `rustc 1.90.0 (1159e78c4 2025-09-14)`, this seems
9// to be needed to workaround parenthesis errors in `LogicalFieldId`, which
10// creep up regardless of comments being added or not. This is possibly a bug
11// with Clippy or `modular_bitfield`, or a small incompatibility issue.
12#![allow(unused_parens)]
13
14use modular_bitfield::prelude::*;
15
16// TODO: Clarify how many namespaces can actually be used.
17/// Category of data a column contains.
18///
19///
20/// The `Namespace` enum prevents ID collisions by segregating different types of
21/// columns into distinct namespaces. Each namespace can contain up to 2^16 tables,
22/// and each table can have up to 2^32 fields.
23///
24/// # Usage
25///
26/// Namespaces are embedded in [`LogicalFieldId`] to create globally unique column
27/// identifiers. User code typically works with `UserData` columns, while system
28/// components use the other namespaces for internal bookkeeping.
29#[derive(Specifier, Debug, PartialEq, Eq, Clone, Copy)]
30#[bits = 16]
31pub enum Namespace {
32 /// User-defined table columns.
33 ///
34 /// This is the default namespace for regular table columns. When a table is created
35 /// with columns like `name TEXT, age INT`, those columns use the `UserData` namespace.
36 UserData = 0,
37
38 /// Internal shadow column tracking row IDs.
39 ///
40 /// For each user column, the storage engine maintains a corresponding shadow column
41 /// that stores the row ID for each value. This enables efficient row-level operations
42 /// and join/filter optimizations.
43 RowIdShadow = 1,
44
45 /// MVCC metadata: transaction that created each row.
46 ///
47 /// Stores the transaction ID (`TxnId`) that inserted each row. Used for snapshot
48 /// isolation to determine row visibility.
49 TxnCreatedBy = 2,
50
51 /// MVCC metadata: transaction that deleted each row.
52 ///
53 /// Stores the transaction ID that deleted each row, or `TXN_ID_NONE` if the row
54 /// is not deleted. Used for snapshot isolation and garbage collection.
55 TxnDeletedBy = 3,
56
57 /// Reserved for future system use.
58 ///
59 /// The value `0xFFFF` is reserved as a sentinel to allow future expansion without
60 /// breaking compatibility.
61 Reserved = 0xFFFF,
62}
63
64/// Unique identifier for a table.
65///
66/// Table IDs are 16-bit unsigned integers, allowing up to 65,535 tables per database.
67/// This type is embedded in [`LogicalFieldId`] to associate columns with tables.
68///
69/// # Special Values
70///
71/// - Table ID `0` is reserved for the system catalog
72/// - User tables receive IDs starting from `1`
73pub type TableId = u16;
74
75/// Unique identifier for a column within a table.
76///
77/// Field IDs are 32-bit unsigned integers, allowing up to ~4.3 billion columns per table.
78/// This type is stored in [`LogicalFieldId::field_id`] and must match that bitfield width.
79///
80/// # Special Values
81///
82/// - Field ID `0` (`ROW_ID_FIELD_ID`) is reserved for row ID columns
83/// - Field ID `u32::MAX` is reserved for MVCC `created_by` columns
84/// - Field ID `u32::MAX - 1` is reserved for MVCC `deleted_by` columns
85/// - User columns receive IDs starting from `1`
86pub type FieldId = u32;
87
88/// Reserved field ID for row ID columns.
89///
90/// This constant is used for the synthetic row ID column that exists in all tables.
91/// Row IDs are globally unique `u64` values that never change once assigned.
92pub const ROW_ID_FIELD_ID: FieldId = 0;
93
94/// Unique identifier for a row within a table.
95///
96/// Row IDs are 64-bit unsigned integers assigned sequentially on insert. They are:
97/// - Globally unique within a table
98/// - Never reused (even after deletion)
99/// - Monotonically increasing (within append batches)
100/// - Used for joins, filters, and row-level operations
101pub type RowId = u64;
102
103/// Globally unique identifier for a column in the storage engine.
104///
105/// A `LogicalFieldId` combines three components into a single 64-bit value:
106/// - **Namespace** (16 bits): Category of data (user, system, MVCC)
107/// - **Table ID** (16 bits): Which table the column belongs to
108/// - **Field ID** (32 bits): Which column within the table
109///
110/// This design prevents ID collisions across different tables and data categories while
111/// keeping identifiers compact and easy to pass around.
112///
113/// # Bit Layout
114///
115/// ```text
116/// |-------- 64 bits total --------|
117/// | namespace | table_id | field_id |
118/// | 16 bits | 16 bits | 32 bits |
119/// ```
120///
121/// # Construction
122///
123/// Use the constructor methods rather than directly manipulating bits:
124/// - [`LogicalFieldId::for_user`] - User-defined columns
125/// - [`LogicalFieldId::for_mvcc_created_by`] - MVCC created_by metadata
126/// - [`LogicalFieldId::for_mvcc_deleted_by`] - MVCC deleted_by metadata
127/// - [`LogicalFieldId::from_parts`] - Custom construction
128///
129/// # Thread Safety
130///
131/// `LogicalFieldId` is `Copy` and thread-safe.
132#[bitfield]
133#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Default)]
134#[repr(u64)]
135pub struct LogicalFieldId {
136 /// Column identifier within the table (32 bits).
137 ///
138 /// Supports up to ~4.3 billion columns per table. Field ID `0` is reserved for
139 /// row ID columns.
140 pub field_id: B32,
141
142 /// Table identifier (16 bits).
143 ///
144 /// Supports up to 65,535 tables. Table ID `0` is reserved for the system catalog.
145 pub table_id: B16,
146
147 /// Data category (16 bits).
148 ///
149 /// Determines whether this ID refers to user data, system metadata, or MVCC tracking.
150 pub namespace: Namespace,
151}
152
153impl LogicalFieldId {
154 /// Construct a `LogicalFieldId` from individual components.
155 ///
156 /// This is the most general constructor. Use the convenience methods
157 /// ([`for_user`](Self::for_user), [`for_mvcc_created_by`](Self::for_mvcc_created_by), etc.)
158 /// for common cases.
159 #[inline]
160 pub fn from_parts(namespace: Namespace, table_id: TableId, field_id: FieldId) -> Self {
161 LogicalFieldId::new()
162 .with_namespace(namespace)
163 .with_table_id(table_id)
164 .with_field_id(field_id)
165 }
166
167 /// Create an ID for a user-defined column.
168 ///
169 /// This is the most common constructor for regular table columns. It uses the
170 /// `UserData` namespace.
171 #[inline]
172 pub fn for_user(table_id: TableId, field_id: FieldId) -> Self {
173 Self::from_parts(Namespace::UserData, table_id, field_id)
174 }
175
176 /// Create an ID for a user column in table 0.
177 ///
178 /// This is a convenience method for tests and examples that use the default table ID.
179 #[inline]
180 pub fn for_user_table_0(field_id: FieldId) -> Self {
181 Self::for_user(0, field_id)
182 }
183
184 /// Create an ID for the MVCC `created_by` column of a table.
185 ///
186 /// Each table has a `created_by` column that tracks which transaction inserted
187 /// each row. The field ID is always `u32::MAX` as a sentinel value.
188 #[inline]
189 pub fn for_mvcc_created_by(table_id: TableId) -> Self {
190 Self::from_parts(Namespace::TxnCreatedBy, table_id, u32::MAX)
191 }
192
193 /// Create an ID for the MVCC `deleted_by` column of a table.
194 ///
195 /// Each table has a `deleted_by` column that tracks which transaction deleted
196 /// each row (or `TXN_ID_NONE` if not deleted). The field ID is always `u32::MAX - 1`
197 /// as a sentinel value.
198 #[inline]
199 pub fn for_mvcc_deleted_by(table_id: TableId) -> Self {
200 Self::from_parts(Namespace::TxnDeletedBy, table_id, u32::MAX - 1)
201 }
202}