Skip to main content

reddb_server/
physical.rs

1//! Physical storage design primitives for RedDB's deterministic on-disk layout.
2
3use std::collections::BTreeMap;
4use std::fs;
5use std::io;
6use std::path::{Path, PathBuf};
7use std::time::{SystemTime, UNIX_EPOCH};
8
9use crate::api::{
10    CatalogSnapshot, CollectionStats, RedDBOptions, SchemaManifest, StorageMode,
11    REDDB_FORMAT_VERSION,
12};
13use crate::index::IndexKind;
14use crate::json::{from_slice, parse_json, to_vec};
15use crate::serde_json::{Map, Value as JsonValue};
16
17pub const DEFAULT_GRID_BLOCK_SIZE: usize = 512 * 1024;
18pub const DEFAULT_PAGE_SIZE: usize = 4096;
19pub const DEFAULT_SUPERBLOCK_COPIES: u8 = 4;
20pub const PHYSICAL_METADATA_PROTOCOL_VERSION: &str = "reddb-physical-v1";
21pub const PHYSICAL_METADATA_BINARY_EXTENSION: &str = "meta.rdbx";
22pub const DEFAULT_MANIFEST_EVENT_HISTORY: usize = 256;
23/// Retention applied when the seq-N catalog journal is enabled at the `Max`
24/// tier. See [`seqn_journal_retention`].
25pub const DEFAULT_METADATA_JOURNAL_RETENTION: usize = 32;
26/// Retention applied when the seq-N catalog journal is opt-in enabled outside
27/// of the `Max` tier — keeps forensics surface minimal on lower tiers.
28pub const OPT_IN_METADATA_JOURNAL_RETENTION: usize = 4;
29
30use std::sync::atomic::{AtomicU8, AtomicUsize, Ordering};
31
32// JSON sidecar policy. 0 = unset (consult env, default off), 1 = enabled,
33// 2 = disabled. Threaded as a process-global because the metadata save path
34// is reached from many call sites that do not currently carry a layout
35// handle. Tier wiring (#469/#471/#472) flips this on at startup for `Max`;
36// minimal/standard/performance leave it off and emit only the binary
37// `<data>.meta.rdbx` + journal entries.
38static META_JSON_SIDECAR_POLICY: AtomicU8 = AtomicU8::new(0);
39
40/// Process-wide opt-in for the legacy `<data>.meta.json` sidecar.
41/// Call once at startup after resolving the active [`StorageLayout`].
42pub fn set_meta_json_sidecar_enabled(enabled: bool) {
43    META_JSON_SIDECAR_POLICY.store(if enabled { 1 } else { 2 }, Ordering::Relaxed);
44}
45
46/// Whether new metadata writes should additionally emit the JSON sidecar.
47/// Defaults to `false`; opt-in via [`set_meta_json_sidecar_enabled`] or the
48/// `REDDB_META_JSON_SIDECAR=1` env var (escape hatch for ad-hoc debugging
49/// of a non-Max instance). Reads always tolerate either JSON or binary.
50pub fn meta_json_sidecar_enabled() -> bool {
51    match META_JSON_SIDECAR_POLICY.load(Ordering::Relaxed) {
52        1 => true,
53        2 => false,
54        _ => std::env::var("REDDB_META_JSON_SIDECAR")
55            .ok()
56            .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "yes" | "on"))
57            .unwrap_or(false),
58    }
59}
60
61// Seq-N catalog journal policy. 0 = unset (consult env, default off), 1 =
62// enabled, 2 = disabled. Mirrors the meta-json sidecar toggle but governs the
63// `<data>.meta.rdbx.seq-{N}` forensic trail emitted on every metadata save.
64// Tier wiring (deferred) flips this on for `Max` with retention 32; opt-in
65// elsewhere lands with retention 4. See `seqn_journal_retention`.
66static SEQN_JOURNAL_POLICY: AtomicU8 = AtomicU8::new(0);
67// Retention override. 0 = unset (consult env, default off-tier retention).
68static SEQN_JOURNAL_RETENTION: AtomicUsize = AtomicUsize::new(0);
69
70/// Process-wide opt-in for the seq-N catalog journal (`<data>.meta.rdbx.seq-N`
71/// snapshot trail). Defaults off so non-`Max` tiers don't accumulate forensic
72/// artifacts. Tier wiring should call this with `true` for `Max`. Escape
73/// hatch: `REDDB_SEQN_JOURNAL=1`.
74pub fn set_seqn_journal_enabled(enabled: bool) {
75    SEQN_JOURNAL_POLICY.store(if enabled { 1 } else { 2 }, Ordering::Relaxed);
76}
77
78/// Whether new metadata saves should also emit a seq-N journal entry.
79pub fn seqn_journal_enabled() -> bool {
80    match SEQN_JOURNAL_POLICY.load(Ordering::Relaxed) {
81        1 => true,
82        2 => false,
83        _ => std::env::var("REDDB_SEQN_JOURNAL")
84            .ok()
85            .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "yes" | "on"))
86            .unwrap_or(false),
87    }
88}
89
90// Pager-meta sidecar policy (#477). 0 = unset (consult env, default off — keep
91// `<data>-meta` shadow), 1 = enabled (fold meta into page 1 + overflow chain;
92// no `-meta` sidecar), 2 = disabled (current behavior). Tier wiring (deferred)
93// flips this on for tiers that prefer a single datafile artifact. Escape hatch:
94// `REDDB_FOLD_PAGER_META=1`.
95static FOLD_PAGER_META_POLICY: AtomicU8 = AtomicU8::new(0);
96
97/// Process-wide opt-in for folding pager metadata (page 1) into the datafile
98/// without an adjacent `<data>-meta` shadow. When enabled, the corruption-
99/// recovery shadow at `<data>-meta` is not written; readers trust page 1
100/// (plus its overflow chain) as the single source of truth. Defaults off.
101pub fn set_fold_pager_meta_enabled(enabled: bool) {
102    FOLD_PAGER_META_POLICY.store(if enabled { 1 } else { 2 }, Ordering::Relaxed);
103}
104
105/// Whether the pager should fold metadata into page 1 only and skip the
106/// `<data>-meta` sidecar shadow. Reads still tolerate the sidecar so existing
107/// databases keep working through the flag flip.
108pub fn fold_pager_meta_enabled() -> bool {
109    match FOLD_PAGER_META_POLICY.load(Ordering::Relaxed) {
110        1 => true,
111        2 => false,
112        _ => std::env::var("REDDB_FOLD_PAGER_META")
113            .ok()
114            .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "yes" | "on"))
115            .unwrap_or(false),
116    }
117}
118
119// Fold-DWB-into-WAL policy (#478). 0 = unset (consult env, default off — keep
120// `-dwb` sidecar), 1 = enabled (emit FullPageImage WAL records before first
121// page modification per checkpoint cycle; no `-dwb` sidecar), 2 = disabled.
122// Tier wiring (deferred) flips this on for tiers that prefer a single WAL-
123// rooted recovery path. Escape hatch: `REDDB_FOLD_DWB_INTO_WAL=1`.
124static FOLD_DWB_INTO_WAL_POLICY: AtomicU8 = AtomicU8::new(0);
125
126/// Process-wide opt-in for folding the double-write buffer into the WAL via
127/// full-page-image (FPI) records. When enabled, the pager does not open or
128/// write `<data>-dwb`; recovery rebuilds torn pages from FPI records replayed
129/// before normal redo. Defaults off.
130pub fn set_fold_dwb_into_wal_enabled(enabled: bool) {
131    FOLD_DWB_INTO_WAL_POLICY.store(if enabled { 1 } else { 2 }, Ordering::Relaxed);
132}
133
134/// Whether the pager should fold DWB into WAL (no `<data>-dwb` sidecar).
135/// Reads still tolerate the legacy sidecar so existing databases keep
136/// working through the flag flip.
137pub fn fold_dwb_into_wal_enabled() -> bool {
138    match FOLD_DWB_INTO_WAL_POLICY.load(Ordering::Relaxed) {
139        1 => true,
140        2 => false,
141        _ => std::env::var("REDDB_FOLD_DWB_INTO_WAL")
142            .ok()
143            .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "yes" | "on"))
144            .unwrap_or(false),
145    }
146}
147
148/// Process-wide retention for the seq-N catalog journal. Tier wiring should
149/// call this with `DEFAULT_METADATA_JOURNAL_RETENTION` (32) for `Max` and
150/// `OPT_IN_METADATA_JOURNAL_RETENTION` (4) for opt-in non-`Max` use.
151/// `0` resets to defaults (env or off-tier baseline).
152pub fn set_seqn_journal_retention(retention: usize) {
153    SEQN_JOURNAL_RETENTION.store(retention, Ordering::Relaxed);
154}
155
156/// Resolved retention bound for the seq-N journal. Falls back to env
157/// `REDDB_SEQN_JOURNAL_RETENTION`, then to `OPT_IN_METADATA_JOURNAL_RETENTION`
158/// (4) — the conservative off-tier baseline.
159pub fn seqn_journal_retention() -> usize {
160    let stored = SEQN_JOURNAL_RETENTION.load(Ordering::Relaxed);
161    if stored > 0 {
162        return stored;
163    }
164    std::env::var("REDDB_SEQN_JOURNAL_RETENTION")
165        .ok()
166        .and_then(|v| v.parse::<usize>().ok())
167        .filter(|v| *v > 0)
168        .unwrap_or(OPT_IN_METADATA_JOURNAL_RETENTION)
169}
170
171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
172pub enum PhysicalMetadataSource {
173    Binary,
174    BinaryJournal,
175    Json,
176}
177
178impl PhysicalMetadataSource {
179    pub fn as_str(self) -> &'static str {
180        match self {
181            Self::Binary => "binary",
182            Self::BinaryJournal => "binary_journal",
183            Self::Json => "json",
184        }
185    }
186}
187#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
188pub struct BlockReference {
189    pub index: u64,
190    pub checksum: u128,
191}
192
193#[derive(Debug, Clone, Default)]
194pub struct ManifestPointers {
195    pub oldest: BlockReference,
196    pub newest: BlockReference,
197}
198
199#[derive(Debug, Clone)]
200pub struct SuperblockHeader {
201    pub format_version: u32,
202    pub sequence: u64,
203    pub copies: u8,
204    pub manifest: ManifestPointers,
205    pub free_set: BlockReference,
206    pub collection_roots: BTreeMap<String, u64>,
207}
208
209impl Default for SuperblockHeader {
210    fn default() -> Self {
211        Self {
212            format_version: crate::api::REDDB_FORMAT_VERSION,
213            sequence: 0,
214            copies: DEFAULT_SUPERBLOCK_COPIES,
215            manifest: ManifestPointers::default(),
216            free_set: BlockReference::default(),
217            collection_roots: BTreeMap::new(),
218        }
219    }
220}
221
222#[derive(Debug, Clone, Copy, PartialEq, Eq)]
223pub enum ManifestEventKind {
224    Insert,
225    Update,
226    Remove,
227    Checkpoint,
228}
229
230#[derive(Debug, Clone)]
231pub struct ManifestEvent {
232    pub collection: String,
233    pub object_key: String,
234    pub kind: ManifestEventKind,
235    pub block: BlockReference,
236    pub snapshot_min: u64,
237    pub snapshot_max: Option<u64>,
238}
239
240#[derive(Debug, Clone, Copy, PartialEq, Eq)]
241pub enum CompactionPolicy {
242    Incremental,
243    Manual,
244}
245
246#[derive(Debug, Clone)]
247pub struct WalPolicy {
248    pub auto_checkpoint_pages: u32,
249    pub fsync_on_commit: bool,
250    pub ring_buffer_bytes: u64,
251}
252
253impl Default for WalPolicy {
254    fn default() -> Self {
255        Self {
256            auto_checkpoint_pages: 1000,
257            fsync_on_commit: true,
258            ring_buffer_bytes: 64 * 1024 * 1024,
259        }
260    }
261}
262
263#[derive(Debug, Clone)]
264pub struct GridLayout {
265    pub block_size: usize,
266    pub page_size: usize,
267    pub superblock_copies: u8,
268}
269
270impl Default for GridLayout {
271    fn default() -> Self {
272        Self {
273            block_size: DEFAULT_GRID_BLOCK_SIZE,
274            page_size: DEFAULT_PAGE_SIZE,
275            superblock_copies: DEFAULT_SUPERBLOCK_COPIES,
276        }
277    }
278}
279
280#[derive(Debug, Clone)]
281pub struct PhysicalLayout {
282    pub mode: StorageMode,
283    pub grid: GridLayout,
284    pub wal: WalPolicy,
285    pub compaction: CompactionPolicy,
286}
287
288impl PhysicalLayout {
289    pub fn from_options(options: &RedDBOptions) -> Self {
290        Self {
291            mode: options.mode,
292            grid: GridLayout::default(),
293            wal: WalPolicy {
294                auto_checkpoint_pages: options.auto_checkpoint_pages,
295                ..WalPolicy::default()
296            },
297            compaction: CompactionPolicy::Incremental,
298        }
299    }
300
301    pub fn is_persistent(&self) -> bool {
302        self.mode == StorageMode::Persistent
303    }
304}
305
306#[derive(Debug, Clone, Default)]
307pub struct SnapshotDescriptor {
308    pub snapshot_id: u64,
309    pub created_at_unix_ms: u128,
310    pub superblock_sequence: u64,
311    pub collection_count: usize,
312    pub total_entities: usize,
313}
314
315#[derive(Debug, Clone, Copy, PartialEq, Eq)]
316pub enum ContractOrigin {
317    Explicit,
318    Implicit,
319    Migrated,
320}
321
322impl ContractOrigin {
323    pub fn as_str(self) -> &'static str {
324        match self {
325            Self::Explicit => "explicit",
326            Self::Implicit => "implicit",
327            Self::Migrated => "migrated",
328        }
329    }
330}
331
332#[derive(Debug, Clone)]
333pub struct DeclaredColumnContract {
334    pub name: String,
335    pub data_type: String,
336    pub sql_type: Option<crate::storage::schema::SqlTypeName>,
337    pub not_null: bool,
338    pub default: Option<String>,
339    pub compress: Option<u8>,
340    pub unique: bool,
341    pub primary_key: bool,
342    pub enum_variants: Vec<String>,
343    pub array_element: Option<String>,
344    pub decimal_precision: Option<u8>,
345}
346
347#[derive(Debug, Clone)]
348pub struct CollectionContract {
349    pub name: String,
350    pub declared_model: crate::catalog::CollectionModel,
351    pub schema_mode: crate::catalog::SchemaMode,
352    pub origin: ContractOrigin,
353    pub version: u32,
354    pub created_at_unix_ms: u128,
355    pub updated_at_unix_ms: u128,
356    pub default_ttl_ms: Option<u64>,
357    pub vector_dimension: Option<usize>,
358    pub vector_metric: Option<crate::storage::engine::distance::DistanceMetric>,
359    pub context_index_fields: Vec<String>,
360    pub declared_columns: Vec<DeclaredColumnContract>,
361    pub table_def: Option<crate::storage::schema::TableDef>,
362    /// Enabled by `CREATE TABLE ... WITH timestamps = true`. When true,
363    /// the runtime auto-populates two user-visible columns
364    /// `created_at` + `updated_at` (BIGINT unix-ms) sourced from the
365    /// `UnifiedEntity::created_at/updated_at` fields. `created_at` is
366    /// immutable after insert; `updated_at` is bumped on every mutation.
367    pub timestamps_enabled: bool,
368    /// Enabled by `CREATE TABLE ... WITH context_index = true` (or by
369    /// naming specific `context_index_fields`). When true, every INSERT
370    /// tokenises the row's text fields and populates the global context
371    /// index that backs `SEARCH CONTEXT` / `SEARCH SIMILAR TEXT` / `ASK`
372    /// (RAG). When false (default), inserts skip the tokenisation +
373    /// 3-way RwLock write storm entirely — ~800 ns faster per insert,
374    /// and SEARCH returns empty for this collection.
375    ///
376    /// Opt-in by design: pure OLTP tables (accounts, orders, events)
377    /// pay zero indexing tax; search-oriented tables (articles, docs)
378    /// flip the switch at CREATE time.
379    pub context_index_enabled: bool,
380    /// Metrics collections are backed by time-series storage but carry a
381    /// metrics-specific raw sample retention contract.
382    pub metrics_raw_retention_ms: Option<u64>,
383    /// Metrics rollup tiers declared by `CREATE METRICS ... DOWNSAMPLE`.
384    pub metrics_rollup_policies: Vec<String>,
385    /// Metrics tenant identity source. Defaults to current tenant context and
386    /// can be declared as a stable identity path for future ingestion slices.
387    pub metrics_tenant_identity: Option<String>,
388    /// Metrics namespace identity. v0 starts with a default namespace so
389    /// series identity is namespace-aware before Prometheus ingestion exists.
390    pub metrics_namespace: Option<String>,
391    /// Enabled by `CREATE TABLE ... APPEND ONLY` or `WITH
392    /// (append_only = true)`. When true, the runtime rejects
393    /// `UPDATE` and `DELETE` against this collection at parse time
394    /// with a clear error — the operator's immutability intent
395    /// becomes a first-class catalog fact rather than an RLS-shaped
396    /// approximation. Default `false` so legacy DDL keeps its
397    /// mutable semantics.
398    pub append_only: bool,
399    /// Declarative subscriptions created by `WITH EVENTS`. This is
400    /// metadata only in #291; event emission is wired by the outbox slice.
401    pub subscriptions: Vec<crate::catalog::SubscriptionDescriptor>,
402    /// Analytics views declared by `CREATE GRAPH ... WITH ANALYTICS (...)`.
403    /// Persisted as part of the contract so each enabled `<graph>.<output>`
404    /// virtual view survives restarts and crash recovery (issue #800).
405    pub analytics_config: Vec<crate::catalog::AnalyticsViewDescriptor>,
406    /// `CREATE TIMESERIES ... WITH SESSION_KEY <col>` — the column the
407    /// `SESSIONIZE` operator partitions by when no key is supplied at
408    /// query-time. `None` for non-timeseries collections and for
409    /// timeseries created without the clause. Issue #576 slice 1.
410    pub session_key: Option<String>,
411    /// `CREATE TIMESERIES ... SESSION_GAP <duration>` — the default
412    /// inactivity gap (milliseconds) the `SESSIONIZE` operator uses to
413    /// close a session when no gap is supplied at query-time. `None`
414    /// for non-timeseries collections and for timeseries created
415    /// without the clause. Issue #576 slice 1.
416    pub session_gap_ms: Option<u64>,
417    /// `ALTER COLLECTION ... SET RETENTION <duration>` — declarative
418    /// retention policy in milliseconds. `None` means retention is
419    /// not enforced. Reads filter out rows older than `now -
420    /// retention_duration_ms` by the collection's timestamp column.
421    /// Issue #580 — DeclarativeRetention slice 1.
422    pub retention_duration_ms: Option<u64>,
423}
424
425/// Canonical artifact lifecycle states.
426///
427/// State machine transitions:
428/// ```text
429///   Declared ──► Building ──► Ready ──► Stale ──► RequiresRebuild
430///       │            │          │                       │
431///       │            ▼          ▼                       │
432///       │         Failed    Disabled                    │
433///       │            │                                  │
434///       └────────────┴──────────────────────────────────┘
435///                    (rebuild restarts from Building)
436/// ```
437#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
438pub enum ArtifactState {
439    /// Index declared but never materialized.
440    Declared,
441    /// Artifact is being built or rebuilt.
442    Building,
443    /// Artifact is materialized and queryable.
444    Ready,
445    /// Artifact is explicitly disabled by the operator.
446    Disabled,
447    /// Underlying data changed; artifact is out of date.
448    Stale,
449    /// Build or warmup failed; manual intervention may be needed.
450    Failed,
451    /// Artifact must be rebuilt before it can serve reads.
452    RequiresRebuild,
453}
454
455impl ArtifactState {
456    /// Parse from the legacy string representation stored in physical metadata.
457    pub fn from_build_state(s: &str, enabled: bool) -> Self {
458        if !enabled {
459            return Self::Disabled;
460        }
461        match s {
462            "ready" => Self::Ready,
463            "building" | "catalog-derived" | "metadata-only" | "artifact-published"
464            | "registry-loaded" => Self::Building,
465            "stale" => Self::Stale,
466            "failed" => Self::Failed,
467            "requires_rebuild" | "requires-rebuild" => Self::RequiresRebuild,
468            _ => Self::Declared,
469        }
470    }
471
472    /// Canonical string representation for storage and API surfaces.
473    pub fn as_str(&self) -> &'static str {
474        match self {
475            Self::Declared => "declared",
476            Self::Building => "building",
477            Self::Ready => "ready",
478            Self::Disabled => "disabled",
479            Self::Stale => "stale",
480            Self::Failed => "failed",
481            Self::RequiresRebuild => "requires_rebuild",
482        }
483    }
484
485    /// Whether this artifact is safe for query reads.
486    pub fn is_queryable(&self) -> bool {
487        matches!(self, Self::Ready)
488    }
489
490    /// Whether a rebuild operation is valid from this state.
491    pub fn can_rebuild(&self) -> bool {
492        matches!(
493            self,
494            Self::Declared | Self::Stale | Self::Failed | Self::RequiresRebuild
495        )
496    }
497
498    /// Whether this state indicates the artifact needs attention.
499    pub fn needs_attention(&self) -> bool {
500        matches!(self, Self::Failed | Self::RequiresRebuild | Self::Stale)
501    }
502}
503
504impl std::fmt::Display for ArtifactState {
505    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
506        f.write_str(self.as_str())
507    }
508}
509
510#[derive(Debug, Clone)]
511pub struct PhysicalIndexState {
512    pub name: String,
513    pub kind: IndexKind,
514    pub collection: Option<String>,
515    pub enabled: bool,
516    pub entries: usize,
517    pub estimated_memory_bytes: u64,
518    pub last_refresh_ms: Option<u128>,
519    pub backend: String,
520    pub artifact_kind: Option<String>,
521    pub artifact_root_page: Option<u32>,
522    pub artifact_checksum: Option<u64>,
523    pub build_state: String,
524}
525
526impl PhysicalIndexState {
527    /// Canonical artifact lifecycle state derived from physical state.
528    pub fn artifact_state(&self) -> ArtifactState {
529        ArtifactState::from_build_state(&self.build_state, self.enabled)
530    }
531}
532
533#[derive(Debug, Clone)]
534pub struct ExportDescriptor {
535    pub name: String,
536    pub created_at_unix_ms: u128,
537    pub snapshot_id: Option<u64>,
538    pub superblock_sequence: u64,
539    pub data_path: String,
540    pub metadata_path: String,
541    pub collection_count: usize,
542    pub total_entities: usize,
543}
544
545#[derive(Debug, Clone)]
546pub struct PhysicalGraphProjection {
547    pub name: String,
548    pub created_at_unix_ms: u128,
549    pub updated_at_unix_ms: u128,
550    pub state: String,
551    pub source: String,
552    pub node_labels: Vec<String>,
553    pub node_types: Vec<String>,
554    pub edge_labels: Vec<String>,
555    pub last_materialized_sequence: Option<u64>,
556}
557
558#[derive(Debug, Clone)]
559pub struct PhysicalAnalyticsJob {
560    pub id: String,
561    pub kind: String,
562    pub state: String,
563    pub projection: Option<String>,
564    pub created_at_unix_ms: u128,
565    pub updated_at_unix_ms: u128,
566    pub last_run_sequence: Option<u64>,
567    pub metadata: BTreeMap<String, String>,
568}
569
570#[derive(Debug, Clone)]
571pub struct PhysicalTreeDefinition {
572    pub collection: String,
573    pub name: String,
574    pub root_id: u64,
575    pub default_max_children: usize,
576    pub ordered_children: bool,
577    pub ownership: String,
578    pub auto_fix_mode: String,
579    pub created_at_unix_ms: u128,
580    pub updated_at_unix_ms: u128,
581}
582
583/// A single persisted hypertable chunk. Mirror of
584/// `storage::timeseries::ChunkMeta`, flattened for the metadata
585/// sidecar so the registry's routing spine survives a restart
586/// (issue #866). `start_ns` plus the owning hypertable name is the
587/// chunk's stable identity.
588#[derive(Debug, Clone)]
589pub struct PhysicalHypertableChunk {
590    pub start_ns: u64,
591    pub end_ns_exclusive: u64,
592    pub row_count: u64,
593    pub min_ts_ns: u64,
594    pub max_ts_ns: u64,
595    pub sealed: bool,
596    pub ttl_override_ns: Option<u64>,
597}
598
599/// A persisted hypertable spec plus all of its chunks. Stored in the
600/// physical metadata sidecar alongside collection contracts so chunk
601/// bounds / routing / TTL are recovered identically after a restart
602/// — the same durability path the rest of the catalog already uses,
603/// not a parallel one (issue #866).
604#[derive(Debug, Clone)]
605pub struct PhysicalHypertable {
606    pub name: String,
607    pub time_column: String,
608    pub chunk_interval_ns: u64,
609    pub default_ttl_ns: Option<u64>,
610    pub chunks: Vec<PhysicalHypertableChunk>,
611}
612
613#[derive(Debug, Clone)]
614pub struct PhysicalMetadataFile {
615    pub protocol_version: String,
616    pub generated_at_unix_ms: u128,
617    pub last_loaded_from: Option<String>,
618    pub last_healed_at_unix_ms: Option<u128>,
619    pub manifest: SchemaManifest,
620    pub catalog: CatalogSnapshot,
621    pub manifest_events: Vec<ManifestEvent>,
622    pub indexes: Vec<PhysicalIndexState>,
623    pub graph_projections: Vec<PhysicalGraphProjection>,
624    pub analytics_jobs: Vec<PhysicalAnalyticsJob>,
625    pub tree_definitions: Vec<PhysicalTreeDefinition>,
626    pub collection_ttl_defaults_ms: BTreeMap<String, u64>,
627    pub collection_contracts: Vec<CollectionContract>,
628    /// Persisted hypertable chunk spine (issue #866). Empty on legacy
629    /// sidecars written before the feature and for non-hypertable
630    /// databases.
631    pub hypertables: Vec<PhysicalHypertable>,
632    pub exports: Vec<ExportDescriptor>,
633    pub superblock: SuperblockHeader,
634    pub snapshots: Vec<SnapshotDescriptor>,
635}
636
637mod helpers;
638mod json_codec;
639mod metadata_file;
640pub mod shm;
641
642pub use self::shm::{
643    provision_shm, read_shm_header, set_shm_provisioning_enabled, shm_path_for,
644    shm_provisioning_enabled, ShmHandle, ShmHeader, ShmProvisionState, SHM_FILE_SIZE,
645    SHM_HEADER_SIZE, SHM_MAGIC, SHM_VERSION,
646};
647
648use self::helpers::*;
649use self::json_codec::*;