Skip to main content

reddb_server/
physical.rs

1//! Physical storage design primitives for RedDB's deterministic on-disk layout.
2
3use std::collections::BTreeMap;
4use std::fs;
5use std::io;
6use std::path::{Path, PathBuf};
7use std::time::{SystemTime, UNIX_EPOCH};
8
9use crate::api::{
10    CatalogSnapshot, CollectionStats, RedDBOptions, SchemaManifest, StorageMode,
11    REDDB_FORMAT_VERSION,
12};
13use crate::index::IndexKind;
14use crate::json::{from_slice, parse_json, to_vec};
15use crate::serde_json::{Map, Value as JsonValue};
16
17pub const DEFAULT_GRID_BLOCK_SIZE: usize = 512 * 1024;
18pub const DEFAULT_PAGE_SIZE: usize = 4096;
19pub const DEFAULT_SUPERBLOCK_COPIES: u8 = 4;
20pub const PHYSICAL_METADATA_PROTOCOL_VERSION: &str = "reddb-physical-v1";
21pub const PHYSICAL_METADATA_BINARY_EXTENSION: &str = "meta.rdbx";
22pub const DEFAULT_MANIFEST_EVENT_HISTORY: usize = 256;
23pub const DEFAULT_METADATA_JOURNAL_RETENTION: usize = 32;
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum PhysicalMetadataSource {
27    Binary,
28    BinaryJournal,
29    Json,
30}
31
32impl PhysicalMetadataSource {
33    pub fn as_str(self) -> &'static str {
34        match self {
35            Self::Binary => "binary",
36            Self::BinaryJournal => "binary_journal",
37            Self::Json => "json",
38        }
39    }
40}
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
42pub struct BlockReference {
43    pub index: u64,
44    pub checksum: u128,
45}
46
47#[derive(Debug, Clone, Default)]
48pub struct ManifestPointers {
49    pub oldest: BlockReference,
50    pub newest: BlockReference,
51}
52
53#[derive(Debug, Clone)]
54pub struct SuperblockHeader {
55    pub format_version: u32,
56    pub sequence: u64,
57    pub copies: u8,
58    pub manifest: ManifestPointers,
59    pub free_set: BlockReference,
60    pub collection_roots: BTreeMap<String, u64>,
61}
62
63impl Default for SuperblockHeader {
64    fn default() -> Self {
65        Self {
66            format_version: crate::api::REDDB_FORMAT_VERSION,
67            sequence: 0,
68            copies: DEFAULT_SUPERBLOCK_COPIES,
69            manifest: ManifestPointers::default(),
70            free_set: BlockReference::default(),
71            collection_roots: BTreeMap::new(),
72        }
73    }
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77pub enum ManifestEventKind {
78    Insert,
79    Update,
80    Remove,
81    Checkpoint,
82}
83
84#[derive(Debug, Clone)]
85pub struct ManifestEvent {
86    pub collection: String,
87    pub object_key: String,
88    pub kind: ManifestEventKind,
89    pub block: BlockReference,
90    pub snapshot_min: u64,
91    pub snapshot_max: Option<u64>,
92}
93
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub enum CompactionPolicy {
96    Incremental,
97    Manual,
98}
99
100#[derive(Debug, Clone)]
101pub struct WalPolicy {
102    pub auto_checkpoint_pages: u32,
103    pub fsync_on_commit: bool,
104    pub ring_buffer_bytes: u64,
105}
106
107impl Default for WalPolicy {
108    fn default() -> Self {
109        Self {
110            auto_checkpoint_pages: 1000,
111            fsync_on_commit: true,
112            ring_buffer_bytes: 64 * 1024 * 1024,
113        }
114    }
115}
116
117#[derive(Debug, Clone)]
118pub struct GridLayout {
119    pub block_size: usize,
120    pub page_size: usize,
121    pub superblock_copies: u8,
122}
123
124impl Default for GridLayout {
125    fn default() -> Self {
126        Self {
127            block_size: DEFAULT_GRID_BLOCK_SIZE,
128            page_size: DEFAULT_PAGE_SIZE,
129            superblock_copies: DEFAULT_SUPERBLOCK_COPIES,
130        }
131    }
132}
133
134#[derive(Debug, Clone)]
135pub struct PhysicalLayout {
136    pub mode: StorageMode,
137    pub grid: GridLayout,
138    pub wal: WalPolicy,
139    pub compaction: CompactionPolicy,
140}
141
142impl PhysicalLayout {
143    pub fn from_options(options: &RedDBOptions) -> Self {
144        Self {
145            mode: options.mode,
146            grid: GridLayout::default(),
147            wal: WalPolicy {
148                auto_checkpoint_pages: options.auto_checkpoint_pages,
149                ..WalPolicy::default()
150            },
151            compaction: CompactionPolicy::Incremental,
152        }
153    }
154
155    pub fn is_persistent(&self) -> bool {
156        self.mode == StorageMode::Persistent
157    }
158}
159
160#[derive(Debug, Clone, Default)]
161pub struct SnapshotDescriptor {
162    pub snapshot_id: u64,
163    pub created_at_unix_ms: u128,
164    pub superblock_sequence: u64,
165    pub collection_count: usize,
166    pub total_entities: usize,
167}
168
169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum ContractOrigin {
171    Explicit,
172    Implicit,
173    Migrated,
174}
175
176impl ContractOrigin {
177    pub fn as_str(self) -> &'static str {
178        match self {
179            Self::Explicit => "explicit",
180            Self::Implicit => "implicit",
181            Self::Migrated => "migrated",
182        }
183    }
184}
185
186#[derive(Debug, Clone)]
187pub struct DeclaredColumnContract {
188    pub name: String,
189    pub data_type: String,
190    pub sql_type: Option<crate::storage::schema::SqlTypeName>,
191    pub not_null: bool,
192    pub default: Option<String>,
193    pub compress: Option<u8>,
194    pub unique: bool,
195    pub primary_key: bool,
196    pub enum_variants: Vec<String>,
197    pub array_element: Option<String>,
198    pub decimal_precision: Option<u8>,
199}
200
201#[derive(Debug, Clone)]
202pub struct CollectionContract {
203    pub name: String,
204    pub declared_model: crate::catalog::CollectionModel,
205    pub schema_mode: crate::catalog::SchemaMode,
206    pub origin: ContractOrigin,
207    pub version: u32,
208    pub created_at_unix_ms: u128,
209    pub updated_at_unix_ms: u128,
210    pub default_ttl_ms: Option<u64>,
211    pub vector_dimension: Option<usize>,
212    pub vector_metric: Option<crate::storage::engine::distance::DistanceMetric>,
213    pub context_index_fields: Vec<String>,
214    pub declared_columns: Vec<DeclaredColumnContract>,
215    pub table_def: Option<crate::storage::schema::TableDef>,
216    /// Enabled by `CREATE TABLE ... WITH timestamps = true`. When true,
217    /// the runtime auto-populates two user-visible columns
218    /// `created_at` + `updated_at` (BIGINT unix-ms) sourced from the
219    /// `UnifiedEntity::created_at/updated_at` fields. `created_at` is
220    /// immutable after insert; `updated_at` is bumped on every mutation.
221    pub timestamps_enabled: bool,
222    /// Enabled by `CREATE TABLE ... WITH context_index = true` (or by
223    /// naming specific `context_index_fields`). When true, every INSERT
224    /// tokenises the row's text fields and populates the global context
225    /// index that backs `SEARCH CONTEXT` / `SEARCH SIMILAR TEXT` / `ASK`
226    /// (RAG). When false (default), inserts skip the tokenisation +
227    /// 3-way RwLock write storm entirely — ~800 ns faster per insert,
228    /// and SEARCH returns empty for this collection.
229    ///
230    /// Opt-in by design: pure OLTP tables (accounts, orders, events)
231    /// pay zero indexing tax; search-oriented tables (articles, docs)
232    /// flip the switch at CREATE time.
233    pub context_index_enabled: bool,
234    /// Enabled by `CREATE TABLE ... APPEND ONLY` or `WITH
235    /// (append_only = true)`. When true, the runtime rejects
236    /// `UPDATE` and `DELETE` against this collection at parse time
237    /// with a clear error — the operator's immutability intent
238    /// becomes a first-class catalog fact rather than an RLS-shaped
239    /// approximation. Default `false` so legacy DDL keeps its
240    /// mutable semantics.
241    pub append_only: bool,
242    /// Declarative subscriptions created by `WITH EVENTS`. This is
243    /// metadata only in #291; event emission is wired by the outbox slice.
244    pub subscriptions: Vec<crate::catalog::SubscriptionDescriptor>,
245}
246
247/// Canonical artifact lifecycle states.
248///
249/// State machine transitions:
250/// ```text
251///   Declared ──► Building ──► Ready ──► Stale ──► RequiresRebuild
252///       │            │          │                       │
253///       │            ▼          ▼                       │
254///       │         Failed    Disabled                    │
255///       │            │                                  │
256///       └────────────┴──────────────────────────────────┘
257///                    (rebuild restarts from Building)
258/// ```
259#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
260pub enum ArtifactState {
261    /// Index declared but never materialized.
262    Declared,
263    /// Artifact is being built or rebuilt.
264    Building,
265    /// Artifact is materialized and queryable.
266    Ready,
267    /// Artifact is explicitly disabled by the operator.
268    Disabled,
269    /// Underlying data changed; artifact is out of date.
270    Stale,
271    /// Build or warmup failed; manual intervention may be needed.
272    Failed,
273    /// Artifact must be rebuilt before it can serve reads.
274    RequiresRebuild,
275}
276
277impl ArtifactState {
278    /// Parse from the legacy string representation stored in physical metadata.
279    pub fn from_build_state(s: &str, enabled: bool) -> Self {
280        if !enabled {
281            return Self::Disabled;
282        }
283        match s {
284            "ready" => Self::Ready,
285            "building" | "catalog-derived" | "metadata-only" | "artifact-published"
286            | "registry-loaded" => Self::Building,
287            "stale" => Self::Stale,
288            "failed" => Self::Failed,
289            "requires_rebuild" | "requires-rebuild" => Self::RequiresRebuild,
290            _ => Self::Declared,
291        }
292    }
293
294    /// Canonical string representation for storage and API surfaces.
295    pub fn as_str(&self) -> &'static str {
296        match self {
297            Self::Declared => "declared",
298            Self::Building => "building",
299            Self::Ready => "ready",
300            Self::Disabled => "disabled",
301            Self::Stale => "stale",
302            Self::Failed => "failed",
303            Self::RequiresRebuild => "requires_rebuild",
304        }
305    }
306
307    /// Whether this artifact is safe for query reads.
308    pub fn is_queryable(&self) -> bool {
309        matches!(self, Self::Ready)
310    }
311
312    /// Whether a rebuild operation is valid from this state.
313    pub fn can_rebuild(&self) -> bool {
314        matches!(
315            self,
316            Self::Declared | Self::Stale | Self::Failed | Self::RequiresRebuild
317        )
318    }
319
320    /// Whether this state indicates the artifact needs attention.
321    pub fn needs_attention(&self) -> bool {
322        matches!(self, Self::Failed | Self::RequiresRebuild | Self::Stale)
323    }
324}
325
326impl std::fmt::Display for ArtifactState {
327    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
328        f.write_str(self.as_str())
329    }
330}
331
332#[derive(Debug, Clone)]
333pub struct PhysicalIndexState {
334    pub name: String,
335    pub kind: IndexKind,
336    pub collection: Option<String>,
337    pub enabled: bool,
338    pub entries: usize,
339    pub estimated_memory_bytes: u64,
340    pub last_refresh_ms: Option<u128>,
341    pub backend: String,
342    pub artifact_kind: Option<String>,
343    pub artifact_root_page: Option<u32>,
344    pub artifact_checksum: Option<u64>,
345    pub build_state: String,
346}
347
348impl PhysicalIndexState {
349    /// Canonical artifact lifecycle state derived from physical state.
350    pub fn artifact_state(&self) -> ArtifactState {
351        ArtifactState::from_build_state(&self.build_state, self.enabled)
352    }
353}
354
355#[derive(Debug, Clone)]
356pub struct ExportDescriptor {
357    pub name: String,
358    pub created_at_unix_ms: u128,
359    pub snapshot_id: Option<u64>,
360    pub superblock_sequence: u64,
361    pub data_path: String,
362    pub metadata_path: String,
363    pub collection_count: usize,
364    pub total_entities: usize,
365}
366
367#[derive(Debug, Clone)]
368pub struct PhysicalGraphProjection {
369    pub name: String,
370    pub created_at_unix_ms: u128,
371    pub updated_at_unix_ms: u128,
372    pub state: String,
373    pub source: String,
374    pub node_labels: Vec<String>,
375    pub node_types: Vec<String>,
376    pub edge_labels: Vec<String>,
377    pub last_materialized_sequence: Option<u64>,
378}
379
380#[derive(Debug, Clone)]
381pub struct PhysicalAnalyticsJob {
382    pub id: String,
383    pub kind: String,
384    pub state: String,
385    pub projection: Option<String>,
386    pub created_at_unix_ms: u128,
387    pub updated_at_unix_ms: u128,
388    pub last_run_sequence: Option<u64>,
389    pub metadata: BTreeMap<String, String>,
390}
391
392#[derive(Debug, Clone)]
393pub struct PhysicalTreeDefinition {
394    pub collection: String,
395    pub name: String,
396    pub root_id: u64,
397    pub default_max_children: usize,
398    pub ordered_children: bool,
399    pub ownership: String,
400    pub auto_fix_mode: String,
401    pub created_at_unix_ms: u128,
402    pub updated_at_unix_ms: u128,
403}
404
405#[derive(Debug, Clone)]
406pub struct PhysicalMetadataFile {
407    pub protocol_version: String,
408    pub generated_at_unix_ms: u128,
409    pub last_loaded_from: Option<String>,
410    pub last_healed_at_unix_ms: Option<u128>,
411    pub manifest: SchemaManifest,
412    pub catalog: CatalogSnapshot,
413    pub manifest_events: Vec<ManifestEvent>,
414    pub indexes: Vec<PhysicalIndexState>,
415    pub graph_projections: Vec<PhysicalGraphProjection>,
416    pub analytics_jobs: Vec<PhysicalAnalyticsJob>,
417    pub tree_definitions: Vec<PhysicalTreeDefinition>,
418    pub collection_ttl_defaults_ms: BTreeMap<String, u64>,
419    pub collection_contracts: Vec<CollectionContract>,
420    pub exports: Vec<ExportDescriptor>,
421    pub superblock: SuperblockHeader,
422    pub snapshots: Vec<SnapshotDescriptor>,
423}
424
425mod helpers;
426mod json_codec;
427mod metadata_file;
428
429use self::helpers::*;
430use self::json_codec::*;