Skip to main content

reddb_server/
physical.rs

1//! Physical storage design primitives for RedDB's deterministic on-disk layout.
2
3use std::collections::BTreeMap;
4use std::fs;
5use std::io;
6use std::path::{Path, PathBuf};
7use std::time::{SystemTime, UNIX_EPOCH};
8
9use crate::api::{
10    CatalogSnapshot, CollectionStats, RedDBOptions, SchemaManifest, StorageMode,
11    REDDB_FORMAT_VERSION,
12};
13use crate::index::IndexKind;
14use crate::json::{from_slice, parse_json, to_vec};
15use crate::serde_json::{Map, Value as JsonValue};
16
17pub const DEFAULT_GRID_BLOCK_SIZE: usize = 512 * 1024;
18pub const DEFAULT_PAGE_SIZE: usize = 4096;
19pub const DEFAULT_SUPERBLOCK_COPIES: u8 = 4;
20pub const PHYSICAL_METADATA_PROTOCOL_VERSION: &str = "reddb-physical-v1";
21pub const PHYSICAL_METADATA_BINARY_EXTENSION: &str = "meta.rdbx";
22pub const DEFAULT_MANIFEST_EVENT_HISTORY: usize = 256;
23pub const DEFAULT_METADATA_JOURNAL_RETENTION: usize = 32;
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum PhysicalMetadataSource {
27    Binary,
28    BinaryJournal,
29    Json,
30}
31
32impl PhysicalMetadataSource {
33    pub fn as_str(self) -> &'static str {
34        match self {
35            Self::Binary => "binary",
36            Self::BinaryJournal => "binary_journal",
37            Self::Json => "json",
38        }
39    }
40}
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
42pub struct BlockReference {
43    pub index: u64,
44    pub checksum: u128,
45}
46
47#[derive(Debug, Clone, Default)]
48pub struct ManifestPointers {
49    pub oldest: BlockReference,
50    pub newest: BlockReference,
51}
52
53#[derive(Debug, Clone)]
54pub struct SuperblockHeader {
55    pub format_version: u32,
56    pub sequence: u64,
57    pub copies: u8,
58    pub manifest: ManifestPointers,
59    pub free_set: BlockReference,
60    pub collection_roots: BTreeMap<String, u64>,
61}
62
63impl Default for SuperblockHeader {
64    fn default() -> Self {
65        Self {
66            format_version: crate::api::REDDB_FORMAT_VERSION,
67            sequence: 0,
68            copies: DEFAULT_SUPERBLOCK_COPIES,
69            manifest: ManifestPointers::default(),
70            free_set: BlockReference::default(),
71            collection_roots: BTreeMap::new(),
72        }
73    }
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77pub enum ManifestEventKind {
78    Insert,
79    Update,
80    Remove,
81    Checkpoint,
82}
83
84#[derive(Debug, Clone)]
85pub struct ManifestEvent {
86    pub collection: String,
87    pub object_key: String,
88    pub kind: ManifestEventKind,
89    pub block: BlockReference,
90    pub snapshot_min: u64,
91    pub snapshot_max: Option<u64>,
92}
93
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub enum CompactionPolicy {
96    Incremental,
97    Manual,
98}
99
100#[derive(Debug, Clone)]
101pub struct WalPolicy {
102    pub auto_checkpoint_pages: u32,
103    pub fsync_on_commit: bool,
104    pub ring_buffer_bytes: u64,
105}
106
107impl Default for WalPolicy {
108    fn default() -> Self {
109        Self {
110            auto_checkpoint_pages: 1000,
111            fsync_on_commit: true,
112            ring_buffer_bytes: 64 * 1024 * 1024,
113        }
114    }
115}
116
117#[derive(Debug, Clone)]
118pub struct GridLayout {
119    pub block_size: usize,
120    pub page_size: usize,
121    pub superblock_copies: u8,
122}
123
124impl Default for GridLayout {
125    fn default() -> Self {
126        Self {
127            block_size: DEFAULT_GRID_BLOCK_SIZE,
128            page_size: DEFAULT_PAGE_SIZE,
129            superblock_copies: DEFAULT_SUPERBLOCK_COPIES,
130        }
131    }
132}
133
134#[derive(Debug, Clone)]
135pub struct PhysicalLayout {
136    pub mode: StorageMode,
137    pub grid: GridLayout,
138    pub wal: WalPolicy,
139    pub compaction: CompactionPolicy,
140}
141
142impl PhysicalLayout {
143    pub fn from_options(options: &RedDBOptions) -> Self {
144        Self {
145            mode: options.mode,
146            grid: GridLayout::default(),
147            wal: WalPolicy {
148                auto_checkpoint_pages: options.auto_checkpoint_pages,
149                ..WalPolicy::default()
150            },
151            compaction: CompactionPolicy::Incremental,
152        }
153    }
154
155    pub fn is_persistent(&self) -> bool {
156        self.mode == StorageMode::Persistent
157    }
158}
159
160#[derive(Debug, Clone, Default)]
161pub struct SnapshotDescriptor {
162    pub snapshot_id: u64,
163    pub created_at_unix_ms: u128,
164    pub superblock_sequence: u64,
165    pub collection_count: usize,
166    pub total_entities: usize,
167}
168
169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum ContractOrigin {
171    Explicit,
172    Implicit,
173    Migrated,
174}
175
176impl ContractOrigin {
177    pub fn as_str(self) -> &'static str {
178        match self {
179            Self::Explicit => "explicit",
180            Self::Implicit => "implicit",
181            Self::Migrated => "migrated",
182        }
183    }
184}
185
186#[derive(Debug, Clone)]
187pub struct DeclaredColumnContract {
188    pub name: String,
189    pub data_type: String,
190    pub sql_type: Option<crate::storage::schema::SqlTypeName>,
191    pub not_null: bool,
192    pub default: Option<String>,
193    pub compress: Option<u8>,
194    pub unique: bool,
195    pub primary_key: bool,
196    pub enum_variants: Vec<String>,
197    pub array_element: Option<String>,
198    pub decimal_precision: Option<u8>,
199}
200
201#[derive(Debug, Clone)]
202pub struct CollectionContract {
203    pub name: String,
204    pub declared_model: crate::catalog::CollectionModel,
205    pub schema_mode: crate::catalog::SchemaMode,
206    pub origin: ContractOrigin,
207    pub version: u32,
208    pub created_at_unix_ms: u128,
209    pub updated_at_unix_ms: u128,
210    pub default_ttl_ms: Option<u64>,
211    pub vector_dimension: Option<usize>,
212    pub vector_metric: Option<crate::storage::engine::distance::DistanceMetric>,
213    pub context_index_fields: Vec<String>,
214    pub declared_columns: Vec<DeclaredColumnContract>,
215    pub table_def: Option<crate::storage::schema::TableDef>,
216    /// Enabled by `CREATE TABLE ... WITH timestamps = true`. When true,
217    /// the runtime auto-populates two user-visible columns
218    /// `created_at` + `updated_at` (BIGINT unix-ms) sourced from the
219    /// `UnifiedEntity::created_at/updated_at` fields. `created_at` is
220    /// immutable after insert; `updated_at` is bumped on every mutation.
221    pub timestamps_enabled: bool,
222    /// Enabled by `CREATE TABLE ... WITH context_index = true` (or by
223    /// naming specific `context_index_fields`). When true, every INSERT
224    /// tokenises the row's text fields and populates the global context
225    /// index that backs `SEARCH CONTEXT` / `SEARCH SIMILAR TEXT` / `ASK`
226    /// (RAG). When false (default), inserts skip the tokenisation +
227    /// 3-way RwLock write storm entirely — ~800 ns faster per insert,
228    /// and SEARCH returns empty for this collection.
229    ///
230    /// Opt-in by design: pure OLTP tables (accounts, orders, events)
231    /// pay zero indexing tax; search-oriented tables (articles, docs)
232    /// flip the switch at CREATE time.
233    pub context_index_enabled: bool,
234    /// Metrics collections are backed by time-series storage but carry a
235    /// metrics-specific raw sample retention contract.
236    pub metrics_raw_retention_ms: Option<u64>,
237    /// Metrics rollup tiers declared by `CREATE METRICS ... DOWNSAMPLE`.
238    pub metrics_rollup_policies: Vec<String>,
239    /// Metrics tenant identity source. Defaults to current tenant context and
240    /// can be declared as a stable identity path for future ingestion slices.
241    pub metrics_tenant_identity: Option<String>,
242    /// Metrics namespace identity. v0 starts with a default namespace so
243    /// series identity is namespace-aware before Prometheus ingestion exists.
244    pub metrics_namespace: Option<String>,
245    /// Enabled by `CREATE TABLE ... APPEND ONLY` or `WITH
246    /// (append_only = true)`. When true, the runtime rejects
247    /// `UPDATE` and `DELETE` against this collection at parse time
248    /// with a clear error — the operator's immutability intent
249    /// becomes a first-class catalog fact rather than an RLS-shaped
250    /// approximation. Default `false` so legacy DDL keeps its
251    /// mutable semantics.
252    pub append_only: bool,
253    /// Declarative subscriptions created by `WITH EVENTS`. This is
254    /// metadata only in #291; event emission is wired by the outbox slice.
255    pub subscriptions: Vec<crate::catalog::SubscriptionDescriptor>,
256}
257
258/// Canonical artifact lifecycle states.
259///
260/// State machine transitions:
261/// ```text
262///   Declared ──► Building ──► Ready ──► Stale ──► RequiresRebuild
263///       │            │          │                       │
264///       │            ▼          ▼                       │
265///       │         Failed    Disabled                    │
266///       │            │                                  │
267///       └────────────┴──────────────────────────────────┘
268///                    (rebuild restarts from Building)
269/// ```
270#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
271pub enum ArtifactState {
272    /// Index declared but never materialized.
273    Declared,
274    /// Artifact is being built or rebuilt.
275    Building,
276    /// Artifact is materialized and queryable.
277    Ready,
278    /// Artifact is explicitly disabled by the operator.
279    Disabled,
280    /// Underlying data changed; artifact is out of date.
281    Stale,
282    /// Build or warmup failed; manual intervention may be needed.
283    Failed,
284    /// Artifact must be rebuilt before it can serve reads.
285    RequiresRebuild,
286}
287
288impl ArtifactState {
289    /// Parse from the legacy string representation stored in physical metadata.
290    pub fn from_build_state(s: &str, enabled: bool) -> Self {
291        if !enabled {
292            return Self::Disabled;
293        }
294        match s {
295            "ready" => Self::Ready,
296            "building" | "catalog-derived" | "metadata-only" | "artifact-published"
297            | "registry-loaded" => Self::Building,
298            "stale" => Self::Stale,
299            "failed" => Self::Failed,
300            "requires_rebuild" | "requires-rebuild" => Self::RequiresRebuild,
301            _ => Self::Declared,
302        }
303    }
304
305    /// Canonical string representation for storage and API surfaces.
306    pub fn as_str(&self) -> &'static str {
307        match self {
308            Self::Declared => "declared",
309            Self::Building => "building",
310            Self::Ready => "ready",
311            Self::Disabled => "disabled",
312            Self::Stale => "stale",
313            Self::Failed => "failed",
314            Self::RequiresRebuild => "requires_rebuild",
315        }
316    }
317
318    /// Whether this artifact is safe for query reads.
319    pub fn is_queryable(&self) -> bool {
320        matches!(self, Self::Ready)
321    }
322
323    /// Whether a rebuild operation is valid from this state.
324    pub fn can_rebuild(&self) -> bool {
325        matches!(
326            self,
327            Self::Declared | Self::Stale | Self::Failed | Self::RequiresRebuild
328        )
329    }
330
331    /// Whether this state indicates the artifact needs attention.
332    pub fn needs_attention(&self) -> bool {
333        matches!(self, Self::Failed | Self::RequiresRebuild | Self::Stale)
334    }
335}
336
337impl std::fmt::Display for ArtifactState {
338    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
339        f.write_str(self.as_str())
340    }
341}
342
343#[derive(Debug, Clone)]
344pub struct PhysicalIndexState {
345    pub name: String,
346    pub kind: IndexKind,
347    pub collection: Option<String>,
348    pub enabled: bool,
349    pub entries: usize,
350    pub estimated_memory_bytes: u64,
351    pub last_refresh_ms: Option<u128>,
352    pub backend: String,
353    pub artifact_kind: Option<String>,
354    pub artifact_root_page: Option<u32>,
355    pub artifact_checksum: Option<u64>,
356    pub build_state: String,
357}
358
359impl PhysicalIndexState {
360    /// Canonical artifact lifecycle state derived from physical state.
361    pub fn artifact_state(&self) -> ArtifactState {
362        ArtifactState::from_build_state(&self.build_state, self.enabled)
363    }
364}
365
366#[derive(Debug, Clone)]
367pub struct ExportDescriptor {
368    pub name: String,
369    pub created_at_unix_ms: u128,
370    pub snapshot_id: Option<u64>,
371    pub superblock_sequence: u64,
372    pub data_path: String,
373    pub metadata_path: String,
374    pub collection_count: usize,
375    pub total_entities: usize,
376}
377
378#[derive(Debug, Clone)]
379pub struct PhysicalGraphProjection {
380    pub name: String,
381    pub created_at_unix_ms: u128,
382    pub updated_at_unix_ms: u128,
383    pub state: String,
384    pub source: String,
385    pub node_labels: Vec<String>,
386    pub node_types: Vec<String>,
387    pub edge_labels: Vec<String>,
388    pub last_materialized_sequence: Option<u64>,
389}
390
391#[derive(Debug, Clone)]
392pub struct PhysicalAnalyticsJob {
393    pub id: String,
394    pub kind: String,
395    pub state: String,
396    pub projection: Option<String>,
397    pub created_at_unix_ms: u128,
398    pub updated_at_unix_ms: u128,
399    pub last_run_sequence: Option<u64>,
400    pub metadata: BTreeMap<String, String>,
401}
402
403#[derive(Debug, Clone)]
404pub struct PhysicalTreeDefinition {
405    pub collection: String,
406    pub name: String,
407    pub root_id: u64,
408    pub default_max_children: usize,
409    pub ordered_children: bool,
410    pub ownership: String,
411    pub auto_fix_mode: String,
412    pub created_at_unix_ms: u128,
413    pub updated_at_unix_ms: u128,
414}
415
416#[derive(Debug, Clone)]
417pub struct PhysicalMetadataFile {
418    pub protocol_version: String,
419    pub generated_at_unix_ms: u128,
420    pub last_loaded_from: Option<String>,
421    pub last_healed_at_unix_ms: Option<u128>,
422    pub manifest: SchemaManifest,
423    pub catalog: CatalogSnapshot,
424    pub manifest_events: Vec<ManifestEvent>,
425    pub indexes: Vec<PhysicalIndexState>,
426    pub graph_projections: Vec<PhysicalGraphProjection>,
427    pub analytics_jobs: Vec<PhysicalAnalyticsJob>,
428    pub tree_definitions: Vec<PhysicalTreeDefinition>,
429    pub collection_ttl_defaults_ms: BTreeMap<String, u64>,
430    pub collection_contracts: Vec<CollectionContract>,
431    pub exports: Vec<ExportDescriptor>,
432    pub superblock: SuperblockHeader,
433    pub snapshots: Vec<SnapshotDescriptor>,
434}
435
436mod helpers;
437mod json_codec;
438mod metadata_file;
439
440use self::helpers::*;
441use self::json_codec::*;