Skip to main content

reddb_server/
physical.rs

1//! Physical storage design primitives for RedDB's deterministic on-disk layout.
2
3use std::collections::BTreeMap;
4use std::fs;
5use std::io;
6use std::path::{Path, PathBuf};
7use std::time::{SystemTime, UNIX_EPOCH};
8
9use crate::api::{
10    CatalogSnapshot, CollectionStats, RedDBOptions, SchemaManifest, StorageMode,
11    REDDB_FORMAT_VERSION,
12};
13use crate::index::IndexKind;
14use crate::json::{from_slice, parse_json, to_vec};
15use crate::serde_json::{Map, Value as JsonValue};
16
17pub const DEFAULT_GRID_BLOCK_SIZE: usize = 512 * 1024;
18pub const DEFAULT_PAGE_SIZE: usize = 4096;
19pub const DEFAULT_SUPERBLOCK_COPIES: u8 = 4;
20pub const PHYSICAL_METADATA_PROTOCOL_VERSION: &str = "reddb-physical-v1";
21pub const PHYSICAL_METADATA_BINARY_EXTENSION: &str = "meta.rdbx";
22pub const DEFAULT_MANIFEST_EVENT_HISTORY: usize = 256;
23pub const DEFAULT_METADATA_JOURNAL_RETENTION: usize = 32;
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum PhysicalMetadataSource {
27    Binary,
28    BinaryJournal,
29    Json,
30}
31
32impl PhysicalMetadataSource {
33    pub fn as_str(self) -> &'static str {
34        match self {
35            Self::Binary => "binary",
36            Self::BinaryJournal => "binary_journal",
37            Self::Json => "json",
38        }
39    }
40}
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
42pub struct BlockReference {
43    pub index: u64,
44    pub checksum: u128,
45}
46
47#[derive(Debug, Clone, Default)]
48pub struct ManifestPointers {
49    pub oldest: BlockReference,
50    pub newest: BlockReference,
51}
52
53#[derive(Debug, Clone)]
54pub struct SuperblockHeader {
55    pub format_version: u32,
56    pub sequence: u64,
57    pub copies: u8,
58    pub manifest: ManifestPointers,
59    pub free_set: BlockReference,
60    pub collection_roots: BTreeMap<String, u64>,
61}
62
63impl Default for SuperblockHeader {
64    fn default() -> Self {
65        Self {
66            format_version: crate::api::REDDB_FORMAT_VERSION,
67            sequence: 0,
68            copies: DEFAULT_SUPERBLOCK_COPIES,
69            manifest: ManifestPointers::default(),
70            free_set: BlockReference::default(),
71            collection_roots: BTreeMap::new(),
72        }
73    }
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77pub enum ManifestEventKind {
78    Insert,
79    Update,
80    Remove,
81    Checkpoint,
82}
83
84#[derive(Debug, Clone)]
85pub struct ManifestEvent {
86    pub collection: String,
87    pub object_key: String,
88    pub kind: ManifestEventKind,
89    pub block: BlockReference,
90    pub snapshot_min: u64,
91    pub snapshot_max: Option<u64>,
92}
93
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub enum CompactionPolicy {
96    Incremental,
97    Manual,
98}
99
100#[derive(Debug, Clone)]
101pub struct WalPolicy {
102    pub auto_checkpoint_pages: u32,
103    pub fsync_on_commit: bool,
104    pub ring_buffer_bytes: u64,
105}
106
107impl Default for WalPolicy {
108    fn default() -> Self {
109        Self {
110            auto_checkpoint_pages: 1000,
111            fsync_on_commit: true,
112            ring_buffer_bytes: 64 * 1024 * 1024,
113        }
114    }
115}
116
117#[derive(Debug, Clone)]
118pub struct GridLayout {
119    pub block_size: usize,
120    pub page_size: usize,
121    pub superblock_copies: u8,
122}
123
124impl Default for GridLayout {
125    fn default() -> Self {
126        Self {
127            block_size: DEFAULT_GRID_BLOCK_SIZE,
128            page_size: DEFAULT_PAGE_SIZE,
129            superblock_copies: DEFAULT_SUPERBLOCK_COPIES,
130        }
131    }
132}
133
134#[derive(Debug, Clone)]
135pub struct PhysicalLayout {
136    pub mode: StorageMode,
137    pub grid: GridLayout,
138    pub wal: WalPolicy,
139    pub compaction: CompactionPolicy,
140}
141
142impl PhysicalLayout {
143    pub fn from_options(options: &RedDBOptions) -> Self {
144        Self {
145            mode: options.mode,
146            grid: GridLayout::default(),
147            wal: WalPolicy {
148                auto_checkpoint_pages: options.auto_checkpoint_pages,
149                ..WalPolicy::default()
150            },
151            compaction: CompactionPolicy::Incremental,
152        }
153    }
154
155    pub fn is_persistent(&self) -> bool {
156        self.mode == StorageMode::Persistent
157    }
158}
159
160#[derive(Debug, Clone, Default)]
161pub struct SnapshotDescriptor {
162    pub snapshot_id: u64,
163    pub created_at_unix_ms: u128,
164    pub superblock_sequence: u64,
165    pub collection_count: usize,
166    pub total_entities: usize,
167}
168
169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum ContractOrigin {
171    Explicit,
172    Implicit,
173    Migrated,
174}
175
176impl ContractOrigin {
177    pub fn as_str(self) -> &'static str {
178        match self {
179            Self::Explicit => "explicit",
180            Self::Implicit => "implicit",
181            Self::Migrated => "migrated",
182        }
183    }
184}
185
186#[derive(Debug, Clone)]
187pub struct DeclaredColumnContract {
188    pub name: String,
189    pub data_type: String,
190    pub sql_type: Option<crate::storage::schema::SqlTypeName>,
191    pub not_null: bool,
192    pub default: Option<String>,
193    pub compress: Option<u8>,
194    pub unique: bool,
195    pub primary_key: bool,
196    pub enum_variants: Vec<String>,
197    pub array_element: Option<String>,
198    pub decimal_precision: Option<u8>,
199}
200
201#[derive(Debug, Clone)]
202pub struct CollectionContract {
203    pub name: String,
204    pub declared_model: crate::catalog::CollectionModel,
205    pub schema_mode: crate::catalog::SchemaMode,
206    pub origin: ContractOrigin,
207    pub version: u32,
208    pub created_at_unix_ms: u128,
209    pub updated_at_unix_ms: u128,
210    pub default_ttl_ms: Option<u64>,
211    pub context_index_fields: Vec<String>,
212    pub declared_columns: Vec<DeclaredColumnContract>,
213    pub table_def: Option<crate::storage::schema::TableDef>,
214    /// Enabled by `CREATE TABLE ... WITH timestamps = true`. When true,
215    /// the runtime auto-populates two user-visible columns
216    /// `created_at` + `updated_at` (BIGINT unix-ms) sourced from the
217    /// `UnifiedEntity::created_at/updated_at` fields. `created_at` is
218    /// immutable after insert; `updated_at` is bumped on every mutation.
219    pub timestamps_enabled: bool,
220    /// Enabled by `CREATE TABLE ... WITH context_index = true` (or by
221    /// naming specific `context_index_fields`). When true, every INSERT
222    /// tokenises the row's text fields and populates the global context
223    /// index that backs `SEARCH CONTEXT` / `SEARCH SIMILAR TEXT` / `ASK`
224    /// (RAG). When false (default), inserts skip the tokenisation +
225    /// 3-way RwLock write storm entirely — ~800 ns faster per insert,
226    /// and SEARCH returns empty for this collection.
227    ///
228    /// Opt-in by design: pure OLTP tables (accounts, orders, events)
229    /// pay zero indexing tax; search-oriented tables (articles, docs)
230    /// flip the switch at CREATE time.
231    pub context_index_enabled: bool,
232    /// Enabled by `CREATE TABLE ... APPEND ONLY` or `WITH
233    /// (append_only = true)`. When true, the runtime rejects
234    /// `UPDATE` and `DELETE` against this collection at parse time
235    /// with a clear error — the operator's immutability intent
236    /// becomes a first-class catalog fact rather than an RLS-shaped
237    /// approximation. Default `false` so legacy DDL keeps its
238    /// mutable semantics.
239    pub append_only: bool,
240    /// Declarative subscriptions created by `WITH EVENTS`. This is
241    /// metadata only in #291; event emission is wired by the outbox slice.
242    pub subscriptions: Vec<crate::catalog::SubscriptionDescriptor>,
243}
244
245/// Canonical artifact lifecycle states.
246///
247/// State machine transitions:
248/// ```text
249///   Declared ──► Building ──► Ready ──► Stale ──► RequiresRebuild
250///       │            │          │                       │
251///       │            ▼          ▼                       │
252///       │         Failed    Disabled                    │
253///       │            │                                  │
254///       └────────────┴──────────────────────────────────┘
255///                    (rebuild restarts from Building)
256/// ```
257#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
258pub enum ArtifactState {
259    /// Index declared but never materialized.
260    Declared,
261    /// Artifact is being built or rebuilt.
262    Building,
263    /// Artifact is materialized and queryable.
264    Ready,
265    /// Artifact is explicitly disabled by the operator.
266    Disabled,
267    /// Underlying data changed; artifact is out of date.
268    Stale,
269    /// Build or warmup failed; manual intervention may be needed.
270    Failed,
271    /// Artifact must be rebuilt before it can serve reads.
272    RequiresRebuild,
273}
274
275impl ArtifactState {
276    /// Parse from the legacy string representation stored in physical metadata.
277    pub fn from_build_state(s: &str, enabled: bool) -> Self {
278        if !enabled {
279            return Self::Disabled;
280        }
281        match s {
282            "ready" => Self::Ready,
283            "building" | "catalog-derived" | "metadata-only" | "artifact-published"
284            | "registry-loaded" => Self::Building,
285            "stale" => Self::Stale,
286            "failed" => Self::Failed,
287            "requires_rebuild" | "requires-rebuild" => Self::RequiresRebuild,
288            _ => Self::Declared,
289        }
290    }
291
292    /// Canonical string representation for storage and API surfaces.
293    pub fn as_str(&self) -> &'static str {
294        match self {
295            Self::Declared => "declared",
296            Self::Building => "building",
297            Self::Ready => "ready",
298            Self::Disabled => "disabled",
299            Self::Stale => "stale",
300            Self::Failed => "failed",
301            Self::RequiresRebuild => "requires_rebuild",
302        }
303    }
304
305    /// Whether this artifact is safe for query reads.
306    pub fn is_queryable(&self) -> bool {
307        matches!(self, Self::Ready)
308    }
309
310    /// Whether a rebuild operation is valid from this state.
311    pub fn can_rebuild(&self) -> bool {
312        matches!(
313            self,
314            Self::Declared | Self::Stale | Self::Failed | Self::RequiresRebuild
315        )
316    }
317
318    /// Whether this state indicates the artifact needs attention.
319    pub fn needs_attention(&self) -> bool {
320        matches!(self, Self::Failed | Self::RequiresRebuild | Self::Stale)
321    }
322}
323
324impl std::fmt::Display for ArtifactState {
325    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
326        f.write_str(self.as_str())
327    }
328}
329
330#[derive(Debug, Clone)]
331pub struct PhysicalIndexState {
332    pub name: String,
333    pub kind: IndexKind,
334    pub collection: Option<String>,
335    pub enabled: bool,
336    pub entries: usize,
337    pub estimated_memory_bytes: u64,
338    pub last_refresh_ms: Option<u128>,
339    pub backend: String,
340    pub artifact_kind: Option<String>,
341    pub artifact_root_page: Option<u32>,
342    pub artifact_checksum: Option<u64>,
343    pub build_state: String,
344}
345
346impl PhysicalIndexState {
347    /// Canonical artifact lifecycle state derived from physical state.
348    pub fn artifact_state(&self) -> ArtifactState {
349        ArtifactState::from_build_state(&self.build_state, self.enabled)
350    }
351}
352
353#[derive(Debug, Clone)]
354pub struct ExportDescriptor {
355    pub name: String,
356    pub created_at_unix_ms: u128,
357    pub snapshot_id: Option<u64>,
358    pub superblock_sequence: u64,
359    pub data_path: String,
360    pub metadata_path: String,
361    pub collection_count: usize,
362    pub total_entities: usize,
363}
364
365#[derive(Debug, Clone)]
366pub struct PhysicalGraphProjection {
367    pub name: String,
368    pub created_at_unix_ms: u128,
369    pub updated_at_unix_ms: u128,
370    pub state: String,
371    pub source: String,
372    pub node_labels: Vec<String>,
373    pub node_types: Vec<String>,
374    pub edge_labels: Vec<String>,
375    pub last_materialized_sequence: Option<u64>,
376}
377
378#[derive(Debug, Clone)]
379pub struct PhysicalAnalyticsJob {
380    pub id: String,
381    pub kind: String,
382    pub state: String,
383    pub projection: Option<String>,
384    pub created_at_unix_ms: u128,
385    pub updated_at_unix_ms: u128,
386    pub last_run_sequence: Option<u64>,
387    pub metadata: BTreeMap<String, String>,
388}
389
390#[derive(Debug, Clone)]
391pub struct PhysicalTreeDefinition {
392    pub collection: String,
393    pub name: String,
394    pub root_id: u64,
395    pub default_max_children: usize,
396    pub ordered_children: bool,
397    pub ownership: String,
398    pub auto_fix_mode: String,
399    pub created_at_unix_ms: u128,
400    pub updated_at_unix_ms: u128,
401}
402
403#[derive(Debug, Clone)]
404pub struct PhysicalMetadataFile {
405    pub protocol_version: String,
406    pub generated_at_unix_ms: u128,
407    pub last_loaded_from: Option<String>,
408    pub last_healed_at_unix_ms: Option<u128>,
409    pub manifest: SchemaManifest,
410    pub catalog: CatalogSnapshot,
411    pub manifest_events: Vec<ManifestEvent>,
412    pub indexes: Vec<PhysicalIndexState>,
413    pub graph_projections: Vec<PhysicalGraphProjection>,
414    pub analytics_jobs: Vec<PhysicalAnalyticsJob>,
415    pub tree_definitions: Vec<PhysicalTreeDefinition>,
416    pub collection_ttl_defaults_ms: BTreeMap<String, u64>,
417    pub collection_contracts: Vec<CollectionContract>,
418    pub exports: Vec<ExportDescriptor>,
419    pub superblock: SuperblockHeader,
420    pub snapshots: Vec<SnapshotDescriptor>,
421}
422
423mod helpers;
424mod json_codec;
425mod metadata_file;
426
427use self::helpers::*;
428use self::json_codec::*;