Skip to main content

grafeo_common/storage/
section.rs

1//! Section types and traits for the `.grafeo` container format.
2//!
3//! A `.grafeo` file is a container of typed sections. Each section holds
4//! one kind of data (LPG nodes, RDF triples, vector indexes, etc.) and
5//! can be independently read, written, checksummed, and mmap'd.
6//!
7//! The [`Section`] trait is the contract between serializers (grafeo-core)
8//! and the container I/O layer (grafeo-storage). Serializers produce opaque
9//! bytes; the container writes them to disk without knowing the contents.
10
11use std::sync::Arc;
12
13use serde::{Deserialize, Serialize};
14
15use crate::memory::buffer::SpillError;
16use crate::storage::page_fetcher::PageFetcher;
17use crate::utils::error::Result;
18
19// ── Section Type ────────────────────────────────────────────────────
20
21/// Identifies a section type in the container directory.
22///
23/// Types 1-9 are **data sections** (authoritative, cannot be rebuilt).
24/// Types 10-19 are **index sections** (derived, can be rebuilt from data).
25/// Types 20+ are reserved for future acceleration structures.
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
27#[repr(u32)]
28#[non_exhaustive]
29pub enum SectionType {
30    /// Schema definitions, index metadata, epoch, configuration.
31    Catalog = 1,
32    /// LPG nodes, edges, properties, named graphs.
33    LpgStore = 2,
34    /// RDF triples and named graphs.
35    RdfStore = 3,
36    /// Columnar CompactStore: read-only base for layered storage.
37    CompactStore = 4,
38    /// Layered overlay deletion log: ids of base entities the overlay
39    /// has deleted but not yet merged. Persists tombstones so that a
40    /// previously-deleted base node does not reappear after reload
41    /// when the next compact has not yet run.
42    OverlayDeletions = 5,
43
44    /// Vector embeddings, HNSW topology, quantization data.
45    VectorStore = 10,
46    /// BM25 inverted index: term dictionary, postings lists.
47    TextIndex = 11,
48    /// RDF Ring index: wavelet trees, succinct permutations.
49    RdfRing = 12,
50    /// Property hash/btree indexes.
51    PropertyIndex = 20,
52}
53
54impl SectionType {
55    /// Whether this section type holds authoritative data (not rebuildable).
56    #[must_use]
57    pub const fn is_data_section(self) -> bool {
58        (self as u32) < 10
59    }
60
61    /// Whether this section type holds a derived index (rebuildable from data).
62    #[must_use]
63    pub const fn is_index_section(self) -> bool {
64        (self as u32) >= 10
65    }
66}
67
68// ── Section Flags ───────────────────────────────────────────────────
69
70/// Flags for a section entry in the container directory.
71#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
72pub struct SectionFlags {
73    /// Bit 0: section is required (older binaries must refuse to open if unknown).
74    /// When false, unknown section types can be safely skipped.
75    pub required: bool,
76    /// Bit 1: section data can be mmap'd for zero-copy access.
77    pub mmap_able: bool,
78}
79
80impl SectionFlags {
81    /// Pack flags into a single byte for on-disk storage.
82    #[must_use]
83    pub const fn to_byte(self) -> u8 {
84        let mut flags = 0u8;
85        if self.required {
86            flags |= 0x01;
87        }
88        if self.mmap_able {
89            flags |= 0x02;
90        }
91        flags
92    }
93
94    /// Unpack flags from a single byte.
95    #[must_use]
96    pub const fn from_byte(byte: u8) -> Self {
97        Self {
98            required: byte & 0x01 != 0,
99            mmap_able: byte & 0x02 != 0,
100        }
101    }
102}
103
104impl SectionType {
105    /// Default flags for this section type.
106    #[must_use]
107    pub const fn default_flags(self) -> SectionFlags {
108        match self {
109            Self::Catalog => SectionFlags {
110                required: true,
111                mmap_able: false,
112            },
113            Self::LpgStore => SectionFlags {
114                required: true,
115                mmap_able: false,
116            },
117            Self::RdfStore => SectionFlags {
118                required: false,
119                mmap_able: false,
120            },
121            Self::CompactStore => SectionFlags {
122                required: true,
123                mmap_able: true,
124            },
125            Self::OverlayDeletions => SectionFlags {
126                // Marked non-required so older readers that don't know about
127                // it can skip rather than refuse to open. Functionally the
128                // section is authoritative for deletion durability, but a
129                // reader that ignores it fails open (deleted base nodes
130                // reappear) rather than failing closed (refuse to open).
131                required: false,
132                mmap_able: false,
133            },
134            Self::VectorStore | Self::TextIndex | Self::RdfRing | Self::PropertyIndex => {
135                SectionFlags {
136                    required: false,
137                    mmap_able: true,
138                }
139            }
140        }
141    }
142}
143
144// ── Section Directory Entry ─────────────────────────────────────────
145
146/// A single entry in the container's section directory.
147///
148/// Fixed 32-byte layout for on-disk storage:
149///
150/// | Offset | Size | Field |
151/// |--------|------|-------|
152/// | 0 | 4 | `section_type` (u32 LE) |
153/// | 4 | 1 | `version` (u8) |
154/// | 5 | 1 | `flags` (packed byte) |
155/// | 6 | 2 | reserved (zero) |
156/// | 8 | 8 | `offset` (u64 LE, byte offset from file start) |
157/// | 16 | 8 | `length` (u64 LE, byte length of section data) |
158/// | 24 | 4 | `checksum` (u32 LE, CRC-32 of section data) |
159/// | 28 | 4 | reserved (zero) |
160#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
161pub struct SectionDirectoryEntry {
162    /// Which section type this entry describes.
163    pub section_type: SectionType,
164    /// Per-section format version (allows independent evolution).
165    pub version: u8,
166    /// Section flags (required, mmap-able).
167    pub flags: SectionFlags,
168    /// Byte offset from file start where section data begins.
169    pub offset: u64,
170    /// Byte length of the section data.
171    pub length: u64,
172    /// CRC-32 checksum of the section data.
173    pub checksum: u32,
174}
175
176impl SectionDirectoryEntry {
177    /// Size of a directory entry on disk (fixed 32 bytes).
178    pub const SIZE: usize = 32;
179}
180
181// ── Section Trait ───────────────────────────────────────────────────
182
183/// A serializable section for the `.grafeo` container.
184///
185/// Implemented in `grafeo-core` for each data model (LPG, RDF) and index
186/// type (Vector, Text, Ring). The container I/O layer in `grafeo-storage`
187/// calls `serialize()` and `deserialize()` without knowing the section internals.
188///
189/// The unified flush model uses this trait: the engine iterates all sections,
190/// serializes dirty ones, and passes the bytes to the container writer.
191pub trait Section: Send + Sync {
192    /// The section type identifier.
193    fn section_type(&self) -> SectionType;
194
195    /// Per-section format version.
196    fn version(&self) -> u8 {
197        1
198    }
199
200    /// Serialize section contents to bytes.
201    ///
202    /// Called by the flush path (checkpoint, eviction, explicit CHECKPOINT).
203    /// The returned bytes are opaque to the container writer.
204    ///
205    /// # Errors
206    ///
207    /// Returns an error if serialization fails (e.g., encoding error).
208    fn serialize(&self) -> Result<Vec<u8>>;
209
210    /// Populate section contents from bytes.
211    ///
212    /// Called during recovery (loading from container) or reload (mmap to RAM).
213    ///
214    /// # Errors
215    ///
216    /// Returns an error if deserialization fails (e.g., corrupt data, version mismatch).
217    fn deserialize(&mut self, data: &[u8]) -> Result<()>;
218
219    /// Whether this section has been modified since the last flush.
220    fn is_dirty(&self) -> bool;
221
222    /// Mark the section as clean after a successful flush.
223    fn mark_clean(&self);
224
225    /// Estimated memory usage of this section in bytes.
226    fn memory_usage(&self) -> usize;
227
228    /// Switch to a mmap-backed read mode using bytes from `fetcher`.
229    ///
230    /// Called by the spill path after the section has been serialized
231    /// to a spill file and that file has been memory-mapped. The
232    /// `fetcher` lifetime is tied to the `Arc`: the section should
233    /// retain the `Arc` for as long as it serves reads from the mmap.
234    ///
235    /// Implementations use interior mutability to swap their backing
236    /// storage. Eager-deserialize sections may decode `fetcher.fetch(0,
237    /// fetcher.len())` into a fresh in-memory copy and keep the
238    /// `fetcher` alive only for OS page-cache warmth; zero-copy
239    /// sections (a future addition) read directly from the fetcher on
240    /// demand.
241    ///
242    /// # Errors
243    ///
244    /// The default returns [`SpillError::NotSupported`]. Concrete
245    /// sections override this to enable spill-to-disk; failures during
246    /// the swap should be reported via [`SpillError::IoError`] or
247    /// another appropriate variant.
248    fn swap_to_mmap(&self, _fetcher: Arc<dyn PageFetcher>) -> std::result::Result<(), SpillError> {
249        Err(SpillError::NotSupported)
250    }
251
252    /// Release any mmap-backed view and return to a fully in-memory
253    /// representation.
254    ///
255    /// The default is a no-op (already in-memory). Sections that
256    /// override [`swap_to_mmap`](Section::swap_to_mmap) should also
257    /// override this to drop their `Arc<dyn PageFetcher>` and, if
258    /// needed, deserialize from a saved buffer.
259    ///
260    /// # Errors
261    ///
262    /// Returns a [`SpillError`] if the reload fails (for example,
263    /// because the spill file is no longer readable).
264    fn reload_to_ram(&self) -> std::result::Result<(), SpillError> {
265        Ok(())
266    }
267}
268
269// ── Tier Override ───────────────────────────────────────────────────
270
271/// Controls whether a section stays in RAM, on disk, or is auto-managed.
272///
273/// The default (`Auto`) lets the [`BufferManager`](crate::memory::buffer::BufferManager)
274/// decide based on memory pressure. Power users can pin a section to a
275/// specific tier for predictable performance.
276#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
277#[non_exhaustive]
278pub enum TierOverride {
279    /// Memory-first, spill to disk when budget exceeded (default).
280    #[default]
281    Auto,
282    /// Always keep in RAM. Fail with error if insufficient memory.
283    ForceRam,
284    /// Always use disk (mmap). Minimal RAM footprint.
285    ForceDisk,
286}
287
288/// Per-section memory configuration.
289///
290/// Allows power users to cap individual sections or pin them to a tier.
291/// Most users leave this at default (all sections auto-managed within the
292/// global memory budget).
293#[derive(Debug, Clone)]
294pub struct SectionMemoryConfig {
295    /// Hard cap on this section's RAM usage (bytes).
296    /// `None` means the section participates in the global budget with no
297    /// per-section cap. The BufferManager decides when to spill.
298    pub max_ram: Option<usize>,
299    /// Storage tier override.
300    pub tier: TierOverride,
301}
302
303impl Default for SectionMemoryConfig {
304    fn default() -> Self {
305        Self {
306            max_ram: None,
307            tier: TierOverride::Auto,
308        }
309    }
310}
311
312#[cfg(test)]
313mod tests {
314    use super::*;
315
316    #[test]
317    fn section_type_classification() {
318        assert!(SectionType::Catalog.is_data_section());
319        assert!(SectionType::LpgStore.is_data_section());
320        assert!(SectionType::RdfStore.is_data_section());
321        assert!(!SectionType::VectorStore.is_data_section());
322
323        assert!(!SectionType::Catalog.is_index_section());
324        assert!(SectionType::VectorStore.is_index_section());
325        assert!(SectionType::TextIndex.is_index_section());
326        assert!(SectionType::RdfRing.is_index_section());
327        assert!(SectionType::PropertyIndex.is_index_section());
328    }
329
330    #[test]
331    fn section_flags_roundtrip() {
332        let flags = SectionFlags {
333            required: true,
334            mmap_able: false,
335        };
336        assert_eq!(flags.to_byte(), 0x01);
337        assert_eq!(SectionFlags::from_byte(0x01), flags);
338
339        let flags = SectionFlags {
340            required: false,
341            mmap_able: true,
342        };
343        assert_eq!(flags.to_byte(), 0x02);
344        assert_eq!(SectionFlags::from_byte(0x02), flags);
345
346        let flags = SectionFlags {
347            required: true,
348            mmap_able: true,
349        };
350        assert_eq!(flags.to_byte(), 0x03);
351        assert_eq!(SectionFlags::from_byte(0x03), flags);
352
353        let empty = SectionFlags::default();
354        assert_eq!(empty.to_byte(), 0x00);
355        assert_eq!(SectionFlags::from_byte(0x00), empty);
356    }
357
358    #[test]
359    fn default_flags_by_type() {
360        let catalog = SectionType::Catalog.default_flags();
361        assert!(catalog.required);
362        assert!(!catalog.mmap_able);
363
364        let vector = SectionType::VectorStore.default_flags();
365        assert!(!vector.required);
366        assert!(vector.mmap_able);
367
368        let rdf = SectionType::RdfStore.default_flags();
369        assert!(!rdf.required);
370        assert!(
371            !rdf.mmap_able,
372            "data sections must be deserialized, not mmap'd"
373        );
374    }
375
376    #[test]
377    fn directory_entry_size() {
378        assert_eq!(SectionDirectoryEntry::SIZE, 32);
379    }
380
381    #[test]
382    fn alix_tier_override_variants() {
383        assert_eq!(TierOverride::Auto, TierOverride::default());
384        // Verify all variants are distinct
385        assert_ne!(TierOverride::Auto, TierOverride::ForceRam);
386        assert_ne!(TierOverride::Auto, TierOverride::ForceDisk);
387        assert_ne!(TierOverride::ForceRam, TierOverride::ForceDisk);
388    }
389
390    #[test]
391    fn gus_section_memory_config_default() {
392        let config = SectionMemoryConfig::default();
393        assert!(config.max_ram.is_none());
394        assert_eq!(config.tier, TierOverride::Auto);
395    }
396
397    #[test]
398    fn vincent_section_memory_config_with_cap() {
399        let config = SectionMemoryConfig {
400            max_ram: Some(1024 * 1024),
401            tier: TierOverride::ForceRam,
402        };
403        assert_eq!(config.max_ram, Some(1024 * 1024));
404        assert_eq!(config.tier, TierOverride::ForceRam);
405    }
406
407    #[test]
408    fn jules_force_disk_tier() {
409        let config = SectionMemoryConfig {
410            max_ram: None,
411            tier: TierOverride::ForceDisk,
412        };
413        assert_eq!(config.tier, TierOverride::ForceDisk);
414    }
415
416    #[test]
417    fn mia_lpg_store_default_flags_distinct_from_rdf() {
418        let lpg = SectionType::LpgStore.default_flags();
419        let rdf = SectionType::RdfStore.default_flags();
420        // LpgStore is required, RdfStore is not
421        assert!(lpg.required);
422        assert!(!rdf.required);
423        // Data sections must be deserialized into RAM, not mmap'd
424        assert!(!lpg.mmap_able, "LpgStore is a data section, not mmap-able");
425        assert!(!rdf.mmap_able, "RdfStore is a data section, not mmap-able");
426    }
427
428    #[test]
429    fn butch_index_section_default_flags_all_variants() {
430        // All index section types share the same flags
431        for section_type in [
432            SectionType::VectorStore,
433            SectionType::TextIndex,
434            SectionType::RdfRing,
435            SectionType::PropertyIndex,
436        ] {
437            let flags = section_type.default_flags();
438            assert!(!flags.required, "{section_type:?} should not be required");
439            assert!(flags.mmap_able, "{section_type:?} should be mmap-able");
440        }
441    }
442
443    #[test]
444    fn django_directory_entry_construction() {
445        let entry = SectionDirectoryEntry {
446            section_type: SectionType::LpgStore,
447            version: 1,
448            flags: SectionFlags {
449                required: true,
450                mmap_able: false,
451            },
452            offset: 4096,
453            length: 8192,
454            checksum: 0xDEAD_BEEF,
455        };
456        assert_eq!(entry.section_type, SectionType::LpgStore);
457        assert_eq!(entry.version, 1);
458        assert!(entry.flags.required);
459        assert!(!entry.flags.mmap_able);
460        assert_eq!(entry.offset, 4096);
461        assert_eq!(entry.length, 8192);
462        assert_eq!(entry.checksum, 0xDEAD_BEEF);
463    }
464
465    #[test]
466    fn shosanna_section_type_is_data_vs_index_boundary() {
467        // Data sections: discriminant < 10
468        assert!(SectionType::Catalog.is_data_section());
469        assert!(!SectionType::Catalog.is_index_section());
470
471        // Index sections: discriminant >= 10
472        assert!(SectionType::VectorStore.is_index_section());
473        assert!(!SectionType::VectorStore.is_data_section());
474
475        // PropertyIndex at discriminant 20 is still an index section
476        assert!(SectionType::PropertyIndex.is_index_section());
477        assert!(!SectionType::PropertyIndex.is_data_section());
478    }
479
480    #[test]
481    fn hans_section_flags_extra_bits_ignored() {
482        // Bits beyond 0 and 1 are ignored by from_byte
483        let flags = SectionFlags::from_byte(0xFF);
484        assert!(flags.required);
485        assert!(flags.mmap_able);
486
487        let flags = SectionFlags::from_byte(0xFC);
488        assert!(!flags.required);
489        assert!(!flags.mmap_able);
490    }
491
492    #[test]
493    fn beatrix_directory_entry_clone_eq() {
494        let entry = SectionDirectoryEntry {
495            section_type: SectionType::RdfRing,
496            version: 2,
497            flags: SectionFlags {
498                required: false,
499                mmap_able: true,
500            },
501            offset: 0,
502            length: 1024,
503            checksum: 42,
504        };
505        let cloned = entry.clone();
506        assert_eq!(entry, cloned);
507    }
508
509    /// Minimal Section trait implementation for testing default methods.
510    struct StubSection {
511        dirty: bool,
512    }
513
514    impl Section for StubSection {
515        fn section_type(&self) -> SectionType {
516            SectionType::LpgStore
517        }
518
519        fn serialize(&self) -> crate::utils::error::Result<Vec<u8>> {
520            Ok(vec![1, 2, 3])
521        }
522
523        fn deserialize(&mut self, _data: &[u8]) -> crate::utils::error::Result<()> {
524            Ok(())
525        }
526
527        fn is_dirty(&self) -> bool {
528            self.dirty
529        }
530
531        fn mark_clean(&self) {}
532
533        fn memory_usage(&self) -> usize {
534            64
535        }
536    }
537
538    #[test]
539    fn mia_section_trait_default_version() {
540        let stub = StubSection { dirty: false };
541        // The default version() method returns 1
542        assert_eq!(stub.version(), 1);
543        assert_eq!(stub.section_type(), SectionType::LpgStore);
544        assert!(!stub.is_dirty());
545        assert_eq!(stub.memory_usage(), 64);
546    }
547
548    #[test]
549    fn butch_section_trait_serialize_deserialize() {
550        let mut stub = StubSection { dirty: true };
551        assert!(stub.is_dirty());
552
553        let data = stub.serialize().unwrap();
554        assert_eq!(data, vec![1, 2, 3]);
555
556        stub.deserialize(&[4, 5, 6]).unwrap();
557        stub.mark_clean();
558    }
559}