llkv_column_map/
types.rs

1//! Core type definitions for the columnar storage engine.
2//!
3//! This module defines the fundamental types used to identify and organize data:
4//! - [`LogicalFieldId`]: Namespaced 64-bit identifier for columns
5//! - [`Namespace`]: Categories of data (user columns, system metadata, MVCC tracking)
6//! - Type aliases for table IDs, field IDs, and row IDs
7
8// FIXME: Since upgrading to `rustc 1.90.0 (1159e78c4 2025-09-14)`, this seems
9// to be needed to workaround parenthesis errors in `LogicalFieldId`, which
10// creep up regardless of comments being added or not. This is possibly a bug
11// with Clippy or `modular_bitfield`, or a small incompatibility issue.
12#![allow(unused_parens)]
13
14use modular_bitfield::prelude::*;
15
16// TODO: Clarify how many namespaces can actually be used.
17/// Category of data a column contains.
18///
19///
20/// The `Namespace` enum prevents ID collisions by segregating different types of
21/// columns into distinct namespaces. Each namespace can contain up to 2^16 tables,
22/// and each table can have up to 2^32 fields.
23///
24/// # Usage
25///
26/// Namespaces are embedded in [`LogicalFieldId`] to create globally unique column
27/// identifiers. User code typically works with `UserData` columns, while system
28/// components use the other namespaces for internal bookkeeping.
29#[derive(Specifier, Debug, PartialEq, Eq, Clone, Copy)]
30#[bits = 16]
31pub enum Namespace {
32    /// User-defined table columns.
33    ///
34    /// This is the default namespace for regular table columns. When a table is created
35    /// with columns like `name TEXT, age INT`, those columns use the `UserData` namespace.
36    UserData = 0,
37
38    /// Internal shadow column tracking row IDs.
39    ///
40    /// For each user column, the storage engine maintains a corresponding shadow column
41    /// that stores the row ID for each value. This enables efficient row-level operations
42    /// and join/filter optimizations.
43    RowIdShadow = 1,
44
45    /// MVCC metadata: transaction that created each row.
46    ///
47    /// Stores the transaction ID (`TxnId`) that inserted each row. Used for snapshot
48    /// isolation to determine row visibility.
49    TxnCreatedBy = 2,
50
51    /// MVCC metadata: transaction that deleted each row.
52    ///
53    /// Stores the transaction ID that deleted each row, or `TXN_ID_NONE` if the row
54    /// is not deleted. Used for snapshot isolation and garbage collection.
55    TxnDeletedBy = 3,
56
57    /// Reserved for future system use.
58    ///
59    /// The value `0xFFFF` is reserved as a sentinel to allow future expansion without
60    /// breaking compatibility.
61    Reserved = 0xFFFF,
62}
63
64/// Unique identifier for a table.
65///
66/// Table IDs are 16-bit unsigned integers, allowing up to 65,535 tables per database.
67/// This type is embedded in [`LogicalFieldId`] to associate columns with tables.
68///
69/// # Special Values
70///
71/// - Table ID `0` is reserved for the system catalog
72/// - User tables receive IDs starting from `1`
73pub type TableId = u16;
74
75/// Unique identifier for a column within a table.
76///
77/// Field IDs are 32-bit unsigned integers, allowing up to ~4.3 billion columns per table.
78/// This type is stored in [`LogicalFieldId::field_id`] and must match that bitfield width.
79///
80/// # Special Values
81///
82/// - Field ID `0` (`ROW_ID_FIELD_ID`) is reserved for row ID columns
83/// - Field ID `u32::MAX` is reserved for MVCC `created_by` columns
84/// - Field ID `u32::MAX - 1` is reserved for MVCC `deleted_by` columns
85/// - User columns receive IDs starting from `1`
86pub type FieldId = u32;
87
88/// Reserved field ID for row ID columns.
89///
90/// This constant is used for the synthetic row ID column that exists in all tables.
91/// Row IDs are globally unique `u64` values that never change once assigned.
92pub const ROW_ID_FIELD_ID: FieldId = 0;
93
94/// Unique identifier for a row within a table.
95///
96/// Row IDs are 64-bit unsigned integers assigned sequentially on insert. They are:
97/// - Globally unique within a table
98/// - Never reused (even after deletion)
99/// - Monotonically increasing (within append batches)
100/// - Used for joins, filters, and row-level operations
101pub type RowId = u64;
102
103/// Globally unique identifier for a column in the storage engine.
104///
105/// A `LogicalFieldId` combines three components into a single 64-bit value:
106/// - **Namespace** (16 bits): Category of data (user, system, MVCC)
107/// - **Table ID** (16 bits): Which table the column belongs to
108/// - **Field ID** (32 bits): Which column within the table
109///
110/// This design prevents ID collisions across different tables and data categories while
111/// keeping identifiers compact and easy to pass around.
112///
113/// # Bit Layout
114///
115/// ```text
116/// |-------- 64 bits total --------|
117/// | namespace | table_id | field_id |
118/// |  16 bits  | 16 bits  | 32 bits  |
119/// ```
120///
121/// # Construction
122///
123/// Use the constructor methods rather than directly manipulating bits:
124/// - [`LogicalFieldId::for_user`] - User-defined columns
125/// - [`LogicalFieldId::for_mvcc_created_by`] - MVCC created_by metadata
126/// - [`LogicalFieldId::for_mvcc_deleted_by`] - MVCC deleted_by metadata
127/// - [`LogicalFieldId::from_parts`] - Custom construction
128///
129/// # Thread Safety
130///
131/// `LogicalFieldId` is `Copy` and thread-safe.
132#[bitfield]
133#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Default)]
134#[repr(u64)]
135pub struct LogicalFieldId {
136    /// Column identifier within the table (32 bits).
137    ///
138    /// Supports up to ~4.3 billion columns per table. Field ID `0` is reserved for
139    /// row ID columns.
140    pub field_id: B32,
141
142    /// Table identifier (16 bits).
143    ///
144    /// Supports up to 65,535 tables. Table ID `0` is reserved for the system catalog.
145    pub table_id: B16,
146
147    /// Data category (16 bits).
148    ///
149    /// Determines whether this ID refers to user data, system metadata, or MVCC tracking.
150    pub namespace: Namespace,
151}
152
153impl LogicalFieldId {
154    /// Construct a `LogicalFieldId` from individual components.
155    ///
156    /// This is the most general constructor. Use the convenience methods
157    /// ([`for_user`](Self::for_user), [`for_mvcc_created_by`](Self::for_mvcc_created_by), etc.)
158    /// for common cases.
159    #[inline]
160    pub fn from_parts(namespace: Namespace, table_id: TableId, field_id: FieldId) -> Self {
161        LogicalFieldId::new()
162            .with_namespace(namespace)
163            .with_table_id(table_id)
164            .with_field_id(field_id)
165    }
166
167    /// Create an ID for a user-defined column.
168    ///
169    /// This is the most common constructor for regular table columns. It uses the
170    /// `UserData` namespace.
171    #[inline]
172    pub fn for_user(table_id: TableId, field_id: FieldId) -> Self {
173        Self::from_parts(Namespace::UserData, table_id, field_id)
174    }
175
176    /// Create an ID for a user column in table 0.
177    ///
178    /// This is a convenience method for tests and examples that use the default table ID.
179    #[inline]
180    pub fn for_user_table_0(field_id: FieldId) -> Self {
181        Self::for_user(0, field_id)
182    }
183
184    /// Create an ID for the MVCC `created_by` column of a table.
185    ///
186    /// Each table has a `created_by` column that tracks which transaction inserted
187    /// each row. The field ID is always `u32::MAX` as a sentinel value.
188    #[inline]
189    pub fn for_mvcc_created_by(table_id: TableId) -> Self {
190        Self::from_parts(Namespace::TxnCreatedBy, table_id, u32::MAX)
191    }
192
193    /// Create an ID for the MVCC `deleted_by` column of a table.
194    ///
195    /// Each table has a `deleted_by` column that tracks which transaction deleted
196    /// each row (or `TXN_ID_NONE` if not deleted). The field ID is always `u32::MAX - 1`
197    /// as a sentinel value.
198    #[inline]
199    pub fn for_mvcc_deleted_by(table_id: TableId) -> Self {
200        Self::from_parts(Namespace::TxnDeletedBy, table_id, u32::MAX - 1)
201    }
202}