Skip to main content

grafeo_common/storage/
section.rs

1//! Section types and traits for the `.grafeo` container format.
2//!
3//! A `.grafeo` file is a container of typed sections. Each section holds
4//! one kind of data (LPG nodes, RDF triples, vector indexes, etc.) and
5//! can be independently read, written, checksummed, and mmap'd.
6//!
7//! The [`Section`] trait is the contract between serializers (grafeo-core)
8//! and the container I/O layer (grafeo-storage). Serializers produce opaque
9//! bytes; the container writes them to disk without knowing the contents.
10
11use serde::{Deserialize, Serialize};
12
13use crate::utils::error::Result;
14
15// ── Section Type ────────────────────────────────────────────────────
16
17/// Identifies a section type in the container directory.
18///
19/// Types 1-9 are **data sections** (authoritative, cannot be rebuilt).
20/// Types 10-19 are **index sections** (derived, can be rebuilt from data).
21/// Types 20+ are reserved for future acceleration structures.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
23#[repr(u32)]
24#[non_exhaustive]
25pub enum SectionType {
26    /// Schema definitions, index metadata, epoch, configuration.
27    Catalog = 1,
28    /// LPG nodes, edges, properties, named graphs.
29    LpgStore = 2,
30    /// RDF triples and named graphs.
31    RdfStore = 3,
32    /// Columnar CompactStore: read-only base for layered storage.
33    CompactStore = 4,
34
35    /// Vector embeddings, HNSW topology, quantization data.
36    VectorStore = 10,
37    /// BM25 inverted index: term dictionary, postings lists.
38    TextIndex = 11,
39    /// RDF Ring index: wavelet trees, succinct permutations.
40    RdfRing = 12,
41    /// Property hash/btree indexes.
42    PropertyIndex = 20,
43}
44
45impl SectionType {
46    /// Whether this section type holds authoritative data (not rebuildable).
47    #[must_use]
48    pub const fn is_data_section(self) -> bool {
49        (self as u32) < 10
50    }
51
52    /// Whether this section type holds a derived index (rebuildable from data).
53    #[must_use]
54    pub const fn is_index_section(self) -> bool {
55        (self as u32) >= 10
56    }
57}
58
59// ── Section Flags ───────────────────────────────────────────────────
60
61/// Flags for a section entry in the container directory.
62#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
63pub struct SectionFlags {
64    /// Bit 0: section is required (older binaries must refuse to open if unknown).
65    /// When false, unknown section types can be safely skipped.
66    pub required: bool,
67    /// Bit 1: section data can be mmap'd for zero-copy access.
68    pub mmap_able: bool,
69}
70
71impl SectionFlags {
72    /// Pack flags into a single byte for on-disk storage.
73    #[must_use]
74    pub const fn to_byte(self) -> u8 {
75        let mut flags = 0u8;
76        if self.required {
77            flags |= 0x01;
78        }
79        if self.mmap_able {
80            flags |= 0x02;
81        }
82        flags
83    }
84
85    /// Unpack flags from a single byte.
86    #[must_use]
87    pub const fn from_byte(byte: u8) -> Self {
88        Self {
89            required: byte & 0x01 != 0,
90            mmap_able: byte & 0x02 != 0,
91        }
92    }
93}
94
95impl SectionType {
96    /// Default flags for this section type.
97    #[must_use]
98    pub const fn default_flags(self) -> SectionFlags {
99        match self {
100            Self::Catalog => SectionFlags {
101                required: true,
102                mmap_able: false,
103            },
104            Self::LpgStore => SectionFlags {
105                required: true,
106                mmap_able: false,
107            },
108            Self::RdfStore => SectionFlags {
109                required: false,
110                mmap_able: false,
111            },
112            Self::CompactStore => SectionFlags {
113                required: true,
114                mmap_able: true,
115            },
116            Self::VectorStore | Self::TextIndex | Self::RdfRing | Self::PropertyIndex => {
117                SectionFlags {
118                    required: false,
119                    mmap_able: true,
120                }
121            }
122        }
123    }
124}
125
126// ── Section Directory Entry ─────────────────────────────────────────
127
128/// A single entry in the container's section directory.
129///
130/// Fixed 32-byte layout for on-disk storage:
131///
132/// | Offset | Size | Field |
133/// |--------|------|-------|
134/// | 0 | 4 | `section_type` (u32 LE) |
135/// | 4 | 1 | `version` (u8) |
136/// | 5 | 1 | `flags` (packed byte) |
137/// | 6 | 2 | reserved (zero) |
138/// | 8 | 8 | `offset` (u64 LE, byte offset from file start) |
139/// | 16 | 8 | `length` (u64 LE, byte length of section data) |
140/// | 24 | 4 | `checksum` (u32 LE, CRC-32 of section data) |
141/// | 28 | 4 | reserved (zero) |
142#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
143pub struct SectionDirectoryEntry {
144    /// Which section type this entry describes.
145    pub section_type: SectionType,
146    /// Per-section format version (allows independent evolution).
147    pub version: u8,
148    /// Section flags (required, mmap-able).
149    pub flags: SectionFlags,
150    /// Byte offset from file start where section data begins.
151    pub offset: u64,
152    /// Byte length of the section data.
153    pub length: u64,
154    /// CRC-32 checksum of the section data.
155    pub checksum: u32,
156}
157
158impl SectionDirectoryEntry {
159    /// Size of a directory entry on disk (fixed 32 bytes).
160    pub const SIZE: usize = 32;
161}
162
163// ── Section Trait ───────────────────────────────────────────────────
164
165/// A serializable section for the `.grafeo` container.
166///
167/// Implemented in `grafeo-core` for each data model (LPG, RDF) and index
168/// type (Vector, Text, Ring). The container I/O layer in `grafeo-storage`
169/// calls `serialize()` and `deserialize()` without knowing the section internals.
170///
171/// The unified flush model uses this trait: the engine iterates all sections,
172/// serializes dirty ones, and passes the bytes to the container writer.
173pub trait Section: Send + Sync {
174    /// The section type identifier.
175    fn section_type(&self) -> SectionType;
176
177    /// Per-section format version.
178    fn version(&self) -> u8 {
179        1
180    }
181
182    /// Serialize section contents to bytes.
183    ///
184    /// Called by the flush path (checkpoint, eviction, explicit CHECKPOINT).
185    /// The returned bytes are opaque to the container writer.
186    ///
187    /// # Errors
188    ///
189    /// Returns an error if serialization fails (e.g., encoding error).
190    fn serialize(&self) -> Result<Vec<u8>>;
191
192    /// Populate section contents from bytes.
193    ///
194    /// Called during recovery (loading from container) or reload (mmap to RAM).
195    ///
196    /// # Errors
197    ///
198    /// Returns an error if deserialization fails (e.g., corrupt data, version mismatch).
199    fn deserialize(&mut self, data: &[u8]) -> Result<()>;
200
201    /// Whether this section has been modified since the last flush.
202    fn is_dirty(&self) -> bool;
203
204    /// Mark the section as clean after a successful flush.
205    fn mark_clean(&self);
206
207    /// Estimated memory usage of this section in bytes.
208    fn memory_usage(&self) -> usize;
209}
210
211// ── Tier Override ───────────────────────────────────────────────────
212
213/// Controls whether a section stays in RAM, on disk, or is auto-managed.
214///
215/// The default (`Auto`) lets the [`BufferManager`](crate::memory::buffer::BufferManager)
216/// decide based on memory pressure. Power users can pin a section to a
217/// specific tier for predictable performance.
218#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
219#[non_exhaustive]
220pub enum TierOverride {
221    /// Memory-first, spill to disk when budget exceeded (default).
222    #[default]
223    Auto,
224    /// Always keep in RAM. Fail with error if insufficient memory.
225    ForceRam,
226    /// Always use disk (mmap). Minimal RAM footprint.
227    ForceDisk,
228}
229
230/// Per-section memory configuration.
231///
232/// Allows power users to cap individual sections or pin them to a tier.
233/// Most users leave this at default (all sections auto-managed within the
234/// global memory budget).
235#[derive(Debug, Clone)]
236pub struct SectionMemoryConfig {
237    /// Hard cap on this section's RAM usage (bytes).
238    /// `None` means the section participates in the global budget with no
239    /// per-section cap. The BufferManager decides when to spill.
240    pub max_ram: Option<usize>,
241    /// Storage tier override.
242    pub tier: TierOverride,
243}
244
245impl Default for SectionMemoryConfig {
246    fn default() -> Self {
247        Self {
248            max_ram: None,
249            tier: TierOverride::Auto,
250        }
251    }
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257
258    #[test]
259    fn section_type_classification() {
260        assert!(SectionType::Catalog.is_data_section());
261        assert!(SectionType::LpgStore.is_data_section());
262        assert!(SectionType::RdfStore.is_data_section());
263        assert!(!SectionType::VectorStore.is_data_section());
264
265        assert!(!SectionType::Catalog.is_index_section());
266        assert!(SectionType::VectorStore.is_index_section());
267        assert!(SectionType::TextIndex.is_index_section());
268        assert!(SectionType::RdfRing.is_index_section());
269        assert!(SectionType::PropertyIndex.is_index_section());
270    }
271
272    #[test]
273    fn section_flags_roundtrip() {
274        let flags = SectionFlags {
275            required: true,
276            mmap_able: false,
277        };
278        assert_eq!(flags.to_byte(), 0x01);
279        assert_eq!(SectionFlags::from_byte(0x01), flags);
280
281        let flags = SectionFlags {
282            required: false,
283            mmap_able: true,
284        };
285        assert_eq!(flags.to_byte(), 0x02);
286        assert_eq!(SectionFlags::from_byte(0x02), flags);
287
288        let flags = SectionFlags {
289            required: true,
290            mmap_able: true,
291        };
292        assert_eq!(flags.to_byte(), 0x03);
293        assert_eq!(SectionFlags::from_byte(0x03), flags);
294
295        let empty = SectionFlags::default();
296        assert_eq!(empty.to_byte(), 0x00);
297        assert_eq!(SectionFlags::from_byte(0x00), empty);
298    }
299
300    #[test]
301    fn default_flags_by_type() {
302        let catalog = SectionType::Catalog.default_flags();
303        assert!(catalog.required);
304        assert!(!catalog.mmap_able);
305
306        let vector = SectionType::VectorStore.default_flags();
307        assert!(!vector.required);
308        assert!(vector.mmap_able);
309
310        let rdf = SectionType::RdfStore.default_flags();
311        assert!(!rdf.required);
312        assert!(
313            !rdf.mmap_able,
314            "data sections must be deserialized, not mmap'd"
315        );
316    }
317
318    #[test]
319    fn directory_entry_size() {
320        assert_eq!(SectionDirectoryEntry::SIZE, 32);
321    }
322
323    #[test]
324    fn alix_tier_override_variants() {
325        assert_eq!(TierOverride::Auto, TierOverride::default());
326        // Verify all variants are distinct
327        assert_ne!(TierOverride::Auto, TierOverride::ForceRam);
328        assert_ne!(TierOverride::Auto, TierOverride::ForceDisk);
329        assert_ne!(TierOverride::ForceRam, TierOverride::ForceDisk);
330    }
331
332    #[test]
333    fn gus_section_memory_config_default() {
334        let config = SectionMemoryConfig::default();
335        assert!(config.max_ram.is_none());
336        assert_eq!(config.tier, TierOverride::Auto);
337    }
338
339    #[test]
340    fn vincent_section_memory_config_with_cap() {
341        let config = SectionMemoryConfig {
342            max_ram: Some(1024 * 1024),
343            tier: TierOverride::ForceRam,
344        };
345        assert_eq!(config.max_ram, Some(1024 * 1024));
346        assert_eq!(config.tier, TierOverride::ForceRam);
347    }
348
349    #[test]
350    fn jules_force_disk_tier() {
351        let config = SectionMemoryConfig {
352            max_ram: None,
353            tier: TierOverride::ForceDisk,
354        };
355        assert_eq!(config.tier, TierOverride::ForceDisk);
356    }
357
358    #[test]
359    fn mia_lpg_store_default_flags_distinct_from_rdf() {
360        let lpg = SectionType::LpgStore.default_flags();
361        let rdf = SectionType::RdfStore.default_flags();
362        // LpgStore is required, RdfStore is not
363        assert!(lpg.required);
364        assert!(!rdf.required);
365        // Data sections must be deserialized into RAM, not mmap'd
366        assert!(!lpg.mmap_able, "LpgStore is a data section, not mmap-able");
367        assert!(!rdf.mmap_able, "RdfStore is a data section, not mmap-able");
368    }
369
370    #[test]
371    fn butch_index_section_default_flags_all_variants() {
372        // All index section types share the same flags
373        for section_type in [
374            SectionType::VectorStore,
375            SectionType::TextIndex,
376            SectionType::RdfRing,
377            SectionType::PropertyIndex,
378        ] {
379            let flags = section_type.default_flags();
380            assert!(!flags.required, "{section_type:?} should not be required");
381            assert!(flags.mmap_able, "{section_type:?} should be mmap-able");
382        }
383    }
384
385    #[test]
386    fn django_directory_entry_construction() {
387        let entry = SectionDirectoryEntry {
388            section_type: SectionType::LpgStore,
389            version: 1,
390            flags: SectionFlags {
391                required: true,
392                mmap_able: false,
393            },
394            offset: 4096,
395            length: 8192,
396            checksum: 0xDEAD_BEEF,
397        };
398        assert_eq!(entry.section_type, SectionType::LpgStore);
399        assert_eq!(entry.version, 1);
400        assert!(entry.flags.required);
401        assert!(!entry.flags.mmap_able);
402        assert_eq!(entry.offset, 4096);
403        assert_eq!(entry.length, 8192);
404        assert_eq!(entry.checksum, 0xDEAD_BEEF);
405    }
406
407    #[test]
408    fn shosanna_section_type_is_data_vs_index_boundary() {
409        // Data sections: discriminant < 10
410        assert!(SectionType::Catalog.is_data_section());
411        assert!(!SectionType::Catalog.is_index_section());
412
413        // Index sections: discriminant >= 10
414        assert!(SectionType::VectorStore.is_index_section());
415        assert!(!SectionType::VectorStore.is_data_section());
416
417        // PropertyIndex at discriminant 20 is still an index section
418        assert!(SectionType::PropertyIndex.is_index_section());
419        assert!(!SectionType::PropertyIndex.is_data_section());
420    }
421
422    #[test]
423    fn hans_section_flags_extra_bits_ignored() {
424        // Bits beyond 0 and 1 are ignored by from_byte
425        let flags = SectionFlags::from_byte(0xFF);
426        assert!(flags.required);
427        assert!(flags.mmap_able);
428
429        let flags = SectionFlags::from_byte(0xFC);
430        assert!(!flags.required);
431        assert!(!flags.mmap_able);
432    }
433
434    #[test]
435    fn beatrix_directory_entry_clone_eq() {
436        let entry = SectionDirectoryEntry {
437            section_type: SectionType::RdfRing,
438            version: 2,
439            flags: SectionFlags {
440                required: false,
441                mmap_able: true,
442            },
443            offset: 0,
444            length: 1024,
445            checksum: 42,
446        };
447        let cloned = entry.clone();
448        assert_eq!(entry, cloned);
449    }
450
451    /// Minimal Section trait implementation for testing default methods.
452    struct StubSection {
453        dirty: bool,
454    }
455
456    impl Section for StubSection {
457        fn section_type(&self) -> SectionType {
458            SectionType::LpgStore
459        }
460
461        fn serialize(&self) -> crate::utils::error::Result<Vec<u8>> {
462            Ok(vec![1, 2, 3])
463        }
464
465        fn deserialize(&mut self, _data: &[u8]) -> crate::utils::error::Result<()> {
466            Ok(())
467        }
468
469        fn is_dirty(&self) -> bool {
470            self.dirty
471        }
472
473        fn mark_clean(&self) {}
474
475        fn memory_usage(&self) -> usize {
476            64
477        }
478    }
479
480    #[test]
481    fn mia_section_trait_default_version() {
482        let stub = StubSection { dirty: false };
483        // The default version() method returns 1
484        assert_eq!(stub.version(), 1);
485        assert_eq!(stub.section_type(), SectionType::LpgStore);
486        assert!(!stub.is_dirty());
487        assert_eq!(stub.memory_usage(), 64);
488    }
489
490    #[test]
491    fn butch_section_trait_serialize_deserialize() {
492        let mut stub = StubSection { dirty: true };
493        assert!(stub.is_dirty());
494
495        let data = stub.serialize().unwrap();
496        assert_eq!(data, vec![1, 2, 3]);
497
498        stub.deserialize(&[4, 5, 6]).unwrap();
499        stub.mark_clean();
500    }
501}