Skip to main content

grafeo_common/storage/
section.rs

1//! Section types and traits for the `.grafeo` container format.
2//!
3//! A `.grafeo` file is a container of typed sections. Each section holds
4//! one kind of data (LPG nodes, RDF triples, vector indexes, etc.) and
5//! can be independently read, written, checksummed, and mmap'd.
6//!
7//! The [`Section`] trait is the contract between serializers (grafeo-core)
8//! and the container I/O layer (grafeo-storage). Serializers produce opaque
9//! bytes; the container writes them to disk without knowing the contents.
10
11use serde::{Deserialize, Serialize};
12
13use crate::utils::error::Result;
14
15// ── Section Type ────────────────────────────────────────────────────
16
17/// Identifies a section type in the container directory.
18///
19/// Types 1-9 are **data sections** (authoritative, cannot be rebuilt).
20/// Types 10-19 are **index sections** (derived, can be rebuilt from data).
21/// Types 20+ are reserved for future acceleration structures.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
23#[repr(u32)]
24#[non_exhaustive]
25pub enum SectionType {
26    /// Schema definitions, index metadata, epoch, configuration.
27    Catalog = 1,
28    /// LPG nodes, edges, properties, named graphs.
29    LpgStore = 2,
30    /// RDF triples and named graphs.
31    RdfStore = 3,
32
33    /// Vector embeddings, HNSW topology, quantization data.
34    VectorStore = 10,
35    /// BM25 inverted index: term dictionary, postings lists.
36    TextIndex = 11,
37    /// RDF Ring index: wavelet trees, succinct permutations.
38    RdfRing = 12,
39    /// Property hash/btree indexes.
40    PropertyIndex = 20,
41}
42
43impl SectionType {
44    /// Whether this section type holds authoritative data (not rebuildable).
45    #[must_use]
46    pub const fn is_data_section(self) -> bool {
47        (self as u32) < 10
48    }
49
50    /// Whether this section type holds a derived index (rebuildable from data).
51    #[must_use]
52    pub const fn is_index_section(self) -> bool {
53        (self as u32) >= 10
54    }
55}
56
57// ── Section Flags ───────────────────────────────────────────────────
58
59/// Flags for a section entry in the container directory.
60#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
61pub struct SectionFlags {
62    /// Bit 0: section is required (older binaries must refuse to open if unknown).
63    /// When false, unknown section types can be safely skipped.
64    pub required: bool,
65    /// Bit 1: section data can be mmap'd for zero-copy access.
66    pub mmap_able: bool,
67}
68
69impl SectionFlags {
70    /// Pack flags into a single byte for on-disk storage.
71    #[must_use]
72    pub const fn to_byte(self) -> u8 {
73        let mut flags = 0u8;
74        if self.required {
75            flags |= 0x01;
76        }
77        if self.mmap_able {
78            flags |= 0x02;
79        }
80        flags
81    }
82
83    /// Unpack flags from a single byte.
84    #[must_use]
85    pub const fn from_byte(byte: u8) -> Self {
86        Self {
87            required: byte & 0x01 != 0,
88            mmap_able: byte & 0x02 != 0,
89        }
90    }
91}
92
93impl SectionType {
94    /// Default flags for this section type.
95    #[must_use]
96    pub const fn default_flags(self) -> SectionFlags {
97        match self {
98            Self::Catalog => SectionFlags {
99                required: true,
100                mmap_able: false,
101            },
102            Self::LpgStore => SectionFlags {
103                required: true,
104                mmap_able: false,
105            },
106            Self::RdfStore => SectionFlags {
107                required: false,
108                mmap_able: false,
109            },
110            Self::VectorStore | Self::TextIndex | Self::RdfRing | Self::PropertyIndex => {
111                SectionFlags {
112                    required: false,
113                    mmap_able: true,
114                }
115            }
116        }
117    }
118}
119
120// ── Section Directory Entry ─────────────────────────────────────────
121
122/// A single entry in the container's section directory.
123///
124/// Fixed 32-byte layout for on-disk storage:
125///
126/// | Offset | Size | Field |
127/// |--------|------|-------|
128/// | 0 | 4 | `section_type` (u32 LE) |
129/// | 4 | 1 | `version` (u8) |
130/// | 5 | 1 | `flags` (packed byte) |
131/// | 6 | 2 | reserved (zero) |
132/// | 8 | 8 | `offset` (u64 LE, byte offset from file start) |
133/// | 16 | 8 | `length` (u64 LE, byte length of section data) |
134/// | 24 | 4 | `checksum` (u32 LE, CRC-32 of section data) |
135/// | 28 | 4 | reserved (zero) |
136#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
137pub struct SectionDirectoryEntry {
138    /// Which section type this entry describes.
139    pub section_type: SectionType,
140    /// Per-section format version (allows independent evolution).
141    pub version: u8,
142    /// Section flags (required, mmap-able).
143    pub flags: SectionFlags,
144    /// Byte offset from file start where section data begins.
145    pub offset: u64,
146    /// Byte length of the section data.
147    pub length: u64,
148    /// CRC-32 checksum of the section data.
149    pub checksum: u32,
150}
151
152impl SectionDirectoryEntry {
153    /// Size of a directory entry on disk (fixed 32 bytes).
154    pub const SIZE: usize = 32;
155}
156
157// ── Section Trait ───────────────────────────────────────────────────
158
159/// A serializable section for the `.grafeo` container.
160///
161/// Implemented in `grafeo-core` for each data model (LPG, RDF) and index
162/// type (Vector, Text, Ring). The container I/O layer in `grafeo-storage`
163/// calls `serialize()` and `deserialize()` without knowing the section internals.
164///
165/// The unified flush model uses this trait: the engine iterates all sections,
166/// serializes dirty ones, and passes the bytes to the container writer.
167pub trait Section: Send + Sync {
168    /// The section type identifier.
169    fn section_type(&self) -> SectionType;
170
171    /// Per-section format version.
172    fn version(&self) -> u8 {
173        1
174    }
175
176    /// Serialize section contents to bytes.
177    ///
178    /// Called by the flush path (checkpoint, eviction, explicit CHECKPOINT).
179    /// The returned bytes are opaque to the container writer.
180    ///
181    /// # Errors
182    ///
183    /// Returns an error if serialization fails (e.g., encoding error).
184    fn serialize(&self) -> Result<Vec<u8>>;
185
186    /// Populate section contents from bytes.
187    ///
188    /// Called during recovery (loading from container) or reload (mmap to RAM).
189    ///
190    /// # Errors
191    ///
192    /// Returns an error if deserialization fails (e.g., corrupt data, version mismatch).
193    fn deserialize(&mut self, data: &[u8]) -> Result<()>;
194
195    /// Whether this section has been modified since the last flush.
196    fn is_dirty(&self) -> bool;
197
198    /// Mark the section as clean after a successful flush.
199    fn mark_clean(&self);
200
201    /// Estimated memory usage of this section in bytes.
202    fn memory_usage(&self) -> usize;
203}
204
205// ── Tier Override ───────────────────────────────────────────────────
206
207/// Controls whether a section stays in RAM, on disk, or is auto-managed.
208///
209/// The default (`Auto`) lets the [`BufferManager`](crate::memory::buffer::BufferManager)
210/// decide based on memory pressure. Power users can pin a section to a
211/// specific tier for predictable performance.
212#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
213#[non_exhaustive]
214pub enum TierOverride {
215    /// Memory-first, spill to disk when budget exceeded (default).
216    #[default]
217    Auto,
218    /// Always keep in RAM. Fail with error if insufficient memory.
219    ForceRam,
220    /// Always use disk (mmap). Minimal RAM footprint.
221    ForceDisk,
222}
223
224/// Per-section memory configuration.
225///
226/// Allows power users to cap individual sections or pin them to a tier.
227/// Most users leave this at default (all sections auto-managed within the
228/// global memory budget).
229#[derive(Debug, Clone)]
230pub struct SectionMemoryConfig {
231    /// Hard cap on this section's RAM usage (bytes).
232    /// `None` means the section participates in the global budget with no
233    /// per-section cap. The BufferManager decides when to spill.
234    pub max_ram: Option<usize>,
235    /// Storage tier override.
236    pub tier: TierOverride,
237}
238
239impl Default for SectionMemoryConfig {
240    fn default() -> Self {
241        Self {
242            max_ram: None,
243            tier: TierOverride::Auto,
244        }
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    #[test]
253    fn section_type_classification() {
254        assert!(SectionType::Catalog.is_data_section());
255        assert!(SectionType::LpgStore.is_data_section());
256        assert!(SectionType::RdfStore.is_data_section());
257        assert!(!SectionType::VectorStore.is_data_section());
258
259        assert!(!SectionType::Catalog.is_index_section());
260        assert!(SectionType::VectorStore.is_index_section());
261        assert!(SectionType::TextIndex.is_index_section());
262        assert!(SectionType::RdfRing.is_index_section());
263        assert!(SectionType::PropertyIndex.is_index_section());
264    }
265
266    #[test]
267    fn section_flags_roundtrip() {
268        let flags = SectionFlags {
269            required: true,
270            mmap_able: false,
271        };
272        assert_eq!(flags.to_byte(), 0x01);
273        assert_eq!(SectionFlags::from_byte(0x01), flags);
274
275        let flags = SectionFlags {
276            required: false,
277            mmap_able: true,
278        };
279        assert_eq!(flags.to_byte(), 0x02);
280        assert_eq!(SectionFlags::from_byte(0x02), flags);
281
282        let flags = SectionFlags {
283            required: true,
284            mmap_able: true,
285        };
286        assert_eq!(flags.to_byte(), 0x03);
287        assert_eq!(SectionFlags::from_byte(0x03), flags);
288
289        let empty = SectionFlags::default();
290        assert_eq!(empty.to_byte(), 0x00);
291        assert_eq!(SectionFlags::from_byte(0x00), empty);
292    }
293
294    #[test]
295    fn default_flags_by_type() {
296        let catalog = SectionType::Catalog.default_flags();
297        assert!(catalog.required);
298        assert!(!catalog.mmap_able);
299
300        let vector = SectionType::VectorStore.default_flags();
301        assert!(!vector.required);
302        assert!(vector.mmap_able);
303
304        let rdf = SectionType::RdfStore.default_flags();
305        assert!(!rdf.required);
306        assert!(
307            !rdf.mmap_able,
308            "data sections must be deserialized, not mmap'd"
309        );
310    }
311
312    #[test]
313    fn directory_entry_size() {
314        assert_eq!(SectionDirectoryEntry::SIZE, 32);
315    }
316
317    #[test]
318    fn alix_tier_override_variants() {
319        assert_eq!(TierOverride::Auto, TierOverride::default());
320        // Verify all variants are distinct
321        assert_ne!(TierOverride::Auto, TierOverride::ForceRam);
322        assert_ne!(TierOverride::Auto, TierOverride::ForceDisk);
323        assert_ne!(TierOverride::ForceRam, TierOverride::ForceDisk);
324    }
325
326    #[test]
327    fn gus_section_memory_config_default() {
328        let config = SectionMemoryConfig::default();
329        assert!(config.max_ram.is_none());
330        assert_eq!(config.tier, TierOverride::Auto);
331    }
332
333    #[test]
334    fn vincent_section_memory_config_with_cap() {
335        let config = SectionMemoryConfig {
336            max_ram: Some(1024 * 1024),
337            tier: TierOverride::ForceRam,
338        };
339        assert_eq!(config.max_ram, Some(1024 * 1024));
340        assert_eq!(config.tier, TierOverride::ForceRam);
341    }
342
343    #[test]
344    fn jules_force_disk_tier() {
345        let config = SectionMemoryConfig {
346            max_ram: None,
347            tier: TierOverride::ForceDisk,
348        };
349        assert_eq!(config.tier, TierOverride::ForceDisk);
350    }
351
352    #[test]
353    fn mia_lpg_store_default_flags_distinct_from_rdf() {
354        let lpg = SectionType::LpgStore.default_flags();
355        let rdf = SectionType::RdfStore.default_flags();
356        // LpgStore is required, RdfStore is not
357        assert!(lpg.required);
358        assert!(!rdf.required);
359        // Data sections must be deserialized into RAM, not mmap'd
360        assert!(!lpg.mmap_able, "LpgStore is a data section, not mmap-able");
361        assert!(!rdf.mmap_able, "RdfStore is a data section, not mmap-able");
362    }
363
364    #[test]
365    fn butch_index_section_default_flags_all_variants() {
366        // All index section types share the same flags
367        for section_type in [
368            SectionType::VectorStore,
369            SectionType::TextIndex,
370            SectionType::RdfRing,
371            SectionType::PropertyIndex,
372        ] {
373            let flags = section_type.default_flags();
374            assert!(!flags.required, "{section_type:?} should not be required");
375            assert!(flags.mmap_able, "{section_type:?} should be mmap-able");
376        }
377    }
378
379    #[test]
380    fn django_directory_entry_construction() {
381        let entry = SectionDirectoryEntry {
382            section_type: SectionType::LpgStore,
383            version: 1,
384            flags: SectionFlags {
385                required: true,
386                mmap_able: false,
387            },
388            offset: 4096,
389            length: 8192,
390            checksum: 0xDEAD_BEEF,
391        };
392        assert_eq!(entry.section_type, SectionType::LpgStore);
393        assert_eq!(entry.version, 1);
394        assert!(entry.flags.required);
395        assert!(!entry.flags.mmap_able);
396        assert_eq!(entry.offset, 4096);
397        assert_eq!(entry.length, 8192);
398        assert_eq!(entry.checksum, 0xDEAD_BEEF);
399    }
400
401    #[test]
402    fn shosanna_section_type_is_data_vs_index_boundary() {
403        // Data sections: discriminant < 10
404        assert!(SectionType::Catalog.is_data_section());
405        assert!(!SectionType::Catalog.is_index_section());
406
407        // Index sections: discriminant >= 10
408        assert!(SectionType::VectorStore.is_index_section());
409        assert!(!SectionType::VectorStore.is_data_section());
410
411        // PropertyIndex at discriminant 20 is still an index section
412        assert!(SectionType::PropertyIndex.is_index_section());
413        assert!(!SectionType::PropertyIndex.is_data_section());
414    }
415
416    #[test]
417    fn hans_section_flags_extra_bits_ignored() {
418        // Bits beyond 0 and 1 are ignored by from_byte
419        let flags = SectionFlags::from_byte(0xFF);
420        assert!(flags.required);
421        assert!(flags.mmap_able);
422
423        let flags = SectionFlags::from_byte(0xFC);
424        assert!(!flags.required);
425        assert!(!flags.mmap_able);
426    }
427
428    #[test]
429    fn beatrix_directory_entry_clone_eq() {
430        let entry = SectionDirectoryEntry {
431            section_type: SectionType::RdfRing,
432            version: 2,
433            flags: SectionFlags {
434                required: false,
435                mmap_able: true,
436            },
437            offset: 0,
438            length: 1024,
439            checksum: 42,
440        };
441        let cloned = entry.clone();
442        assert_eq!(entry, cloned);
443    }
444
445    /// Minimal Section trait implementation for testing default methods.
446    struct StubSection {
447        dirty: bool,
448    }
449
450    impl Section for StubSection {
451        fn section_type(&self) -> SectionType {
452            SectionType::LpgStore
453        }
454
455        fn serialize(&self) -> crate::utils::error::Result<Vec<u8>> {
456            Ok(vec![1, 2, 3])
457        }
458
459        fn deserialize(&mut self, _data: &[u8]) -> crate::utils::error::Result<()> {
460            Ok(())
461        }
462
463        fn is_dirty(&self) -> bool {
464            self.dirty
465        }
466
467        fn mark_clean(&self) {}
468
469        fn memory_usage(&self) -> usize {
470            64
471        }
472    }
473
474    #[test]
475    fn mia_section_trait_default_version() {
476        let stub = StubSection { dirty: false };
477        // The default version() method returns 1
478        assert_eq!(stub.version(), 1);
479        assert_eq!(stub.section_type(), SectionType::LpgStore);
480        assert!(!stub.is_dirty());
481        assert_eq!(stub.memory_usage(), 64);
482    }
483
484    #[test]
485    fn butch_section_trait_serialize_deserialize() {
486        let mut stub = StubSection { dirty: true };
487        assert!(stub.is_dirty());
488
489        let data = stub.serialize().unwrap();
490        assert_eq!(data, vec![1, 2, 3]);
491
492        stub.deserialize(&[4, 5, 6]).unwrap();
493        stub.mark_clean();
494    }
495}