grafeo_common/storage/section.rs
1//! Section types and traits for the `.grafeo` container format.
2//!
3//! A `.grafeo` file is a container of typed sections. Each section holds
4//! one kind of data (LPG nodes, RDF triples, vector indexes, etc.) and
5//! can be independently read, written, checksummed, and mmap'd.
6//!
7//! The [`Section`] trait is the contract between serializers (grafeo-core)
8//! and the container I/O layer (grafeo-storage). Serializers produce opaque
9//! bytes; the container writes them to disk without knowing the contents.
10
11use std::sync::Arc;
12
13use serde::{Deserialize, Serialize};
14
15use crate::memory::buffer::SpillError;
16use crate::storage::page_fetcher::PageFetcher;
17use crate::utils::error::Result;
18
19// ── Section Type ────────────────────────────────────────────────────
20
21/// Identifies a section type in the container directory.
22///
23/// Types 1-9 are **data sections** (authoritative, cannot be rebuilt).
24/// Types 10-19 are **index sections** (derived, can be rebuilt from data).
25/// Types 20+ are reserved for future acceleration structures.
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
27#[repr(u32)]
28#[non_exhaustive]
29pub enum SectionType {
30 /// Schema definitions, index metadata, epoch, configuration.
31 Catalog = 1,
32 /// LPG nodes, edges, properties, named graphs.
33 LpgStore = 2,
34 /// RDF triples and named graphs.
35 RdfStore = 3,
36 /// Columnar CompactStore: read-only base for layered storage.
37 CompactStore = 4,
38 /// Layered overlay deletion log: ids of base entities the overlay
39 /// has deleted but not yet merged. Persists tombstones so that a
40 /// previously-deleted base node does not reappear after reload
41 /// when the next compact has not yet run.
42 OverlayDeletions = 5,
43
44 /// Vector embeddings, HNSW topology, quantization data.
45 VectorStore = 10,
46 /// BM25 inverted index: term dictionary, postings lists.
47 TextIndex = 11,
48 /// RDF Ring index: wavelet trees, succinct permutations.
49 RdfRing = 12,
50 /// Property hash/btree indexes.
51 PropertyIndex = 20,
52}
53
54impl SectionType {
55 /// Whether this section type holds authoritative data (not rebuildable).
56 #[must_use]
57 pub const fn is_data_section(self) -> bool {
58 (self as u32) < 10
59 }
60
61 /// Whether this section type holds a derived index (rebuildable from data).
62 #[must_use]
63 pub const fn is_index_section(self) -> bool {
64 (self as u32) >= 10
65 }
66}
67
68// ── Section Flags ───────────────────────────────────────────────────
69
70/// Flags for a section entry in the container directory.
71#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
72pub struct SectionFlags {
73 /// Bit 0: section is required (older binaries must refuse to open if unknown).
74 /// When false, unknown section types can be safely skipped.
75 pub required: bool,
76 /// Bit 1: section data can be mmap'd for zero-copy access.
77 pub mmap_able: bool,
78}
79
80impl SectionFlags {
81 /// Pack flags into a single byte for on-disk storage.
82 #[must_use]
83 pub const fn to_byte(self) -> u8 {
84 let mut flags = 0u8;
85 if self.required {
86 flags |= 0x01;
87 }
88 if self.mmap_able {
89 flags |= 0x02;
90 }
91 flags
92 }
93
94 /// Unpack flags from a single byte.
95 #[must_use]
96 pub const fn from_byte(byte: u8) -> Self {
97 Self {
98 required: byte & 0x01 != 0,
99 mmap_able: byte & 0x02 != 0,
100 }
101 }
102}
103
104impl SectionType {
105 /// Default flags for this section type.
106 #[must_use]
107 pub const fn default_flags(self) -> SectionFlags {
108 match self {
109 Self::Catalog => SectionFlags {
110 required: true,
111 mmap_able: false,
112 },
113 Self::LpgStore => SectionFlags {
114 required: true,
115 mmap_able: false,
116 },
117 Self::RdfStore => SectionFlags {
118 required: false,
119 mmap_able: false,
120 },
121 Self::CompactStore => SectionFlags {
122 required: true,
123 mmap_able: true,
124 },
125 Self::OverlayDeletions => SectionFlags {
126 // Marked non-required so older readers that don't know about
127 // it can skip rather than refuse to open. Functionally the
128 // section is authoritative for deletion durability, but a
129 // reader that ignores it fails open (deleted base nodes
130 // reappear) rather than failing closed (refuse to open).
131 required: false,
132 mmap_able: false,
133 },
134 Self::VectorStore | Self::TextIndex | Self::RdfRing | Self::PropertyIndex => {
135 SectionFlags {
136 required: false,
137 mmap_able: true,
138 }
139 }
140 }
141 }
142}
143
144// ── Section Directory Entry ─────────────────────────────────────────
145
146/// A single entry in the container's section directory.
147///
148/// Fixed 32-byte layout for on-disk storage:
149///
150/// | Offset | Size | Field |
151/// |--------|------|-------|
152/// | 0 | 4 | `section_type` (u32 LE) |
153/// | 4 | 1 | `version` (u8) |
154/// | 5 | 1 | `flags` (packed byte) |
155/// | 6 | 2 | reserved (zero) |
156/// | 8 | 8 | `offset` (u64 LE, byte offset from file start) |
157/// | 16 | 8 | `length` (u64 LE, byte length of section data) |
158/// | 24 | 4 | `checksum` (u32 LE, CRC-32 of section data) |
159/// | 28 | 4 | reserved (zero) |
160#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
161pub struct SectionDirectoryEntry {
162 /// Which section type this entry describes.
163 pub section_type: SectionType,
164 /// Per-section format version (allows independent evolution).
165 pub version: u8,
166 /// Section flags (required, mmap-able).
167 pub flags: SectionFlags,
168 /// Byte offset from file start where section data begins.
169 pub offset: u64,
170 /// Byte length of the section data.
171 pub length: u64,
172 /// CRC-32 checksum of the section data.
173 pub checksum: u32,
174}
175
176impl SectionDirectoryEntry {
177 /// Size of a directory entry on disk (fixed 32 bytes).
178 pub const SIZE: usize = 32;
179}
180
181// ── Section Trait ───────────────────────────────────────────────────
182
183/// A serializable section for the `.grafeo` container.
184///
185/// Implemented in `grafeo-core` for each data model (LPG, RDF) and index
186/// type (Vector, Text, Ring). The container I/O layer in `grafeo-storage`
187/// calls `serialize()` and `deserialize()` without knowing the section internals.
188///
189/// The unified flush model uses this trait: the engine iterates all sections,
190/// serializes dirty ones, and passes the bytes to the container writer.
191pub trait Section: Send + Sync {
192 /// The section type identifier.
193 fn section_type(&self) -> SectionType;
194
195 /// Per-section format version.
196 fn version(&self) -> u8 {
197 1
198 }
199
200 /// Serialize section contents to bytes.
201 ///
202 /// Called by the flush path (checkpoint, eviction, explicit CHECKPOINT).
203 /// The returned bytes are opaque to the container writer.
204 ///
205 /// # Errors
206 ///
207 /// Returns an error if serialization fails (e.g., encoding error).
208 fn serialize(&self) -> Result<Vec<u8>>;
209
210 /// Populate section contents from bytes.
211 ///
212 /// Called during recovery (loading from container) or reload (mmap to RAM).
213 ///
214 /// # Errors
215 ///
216 /// Returns an error if deserialization fails (e.g., corrupt data, version mismatch).
217 fn deserialize(&mut self, data: &[u8]) -> Result<()>;
218
219 /// Whether this section has been modified since the last flush.
220 fn is_dirty(&self) -> bool;
221
222 /// Mark the section as clean after a successful flush.
223 fn mark_clean(&self);
224
225 /// Estimated memory usage of this section in bytes.
226 fn memory_usage(&self) -> usize;
227
228 /// Switch to a mmap-backed read mode using bytes from `fetcher`.
229 ///
230 /// Called by the spill path after the section has been serialized
231 /// to a spill file and that file has been memory-mapped. The
232 /// `fetcher` lifetime is tied to the `Arc`: the section should
233 /// retain the `Arc` for as long as it serves reads from the mmap.
234 ///
235 /// Implementations use interior mutability to swap their backing
236 /// storage. Eager-deserialize sections may decode `fetcher.fetch(0,
237 /// fetcher.len())` into a fresh in-memory copy and keep the
238 /// `fetcher` alive only for OS page-cache warmth; zero-copy
239 /// sections (a future addition) read directly from the fetcher on
240 /// demand.
241 ///
242 /// # Errors
243 ///
244 /// The default returns [`SpillError::NotSupported`]. Concrete
245 /// sections override this to enable spill-to-disk; failures during
246 /// the swap should be reported via [`SpillError::IoError`] or
247 /// another appropriate variant.
248 fn swap_to_mmap(&self, _fetcher: Arc<dyn PageFetcher>) -> std::result::Result<(), SpillError> {
249 Err(SpillError::NotSupported)
250 }
251
252 /// Release any mmap-backed view and return to a fully in-memory
253 /// representation.
254 ///
255 /// The default is a no-op (already in-memory). Sections that
256 /// override [`swap_to_mmap`](Section::swap_to_mmap) should also
257 /// override this to drop their `Arc<dyn PageFetcher>` and, if
258 /// needed, deserialize from a saved buffer.
259 ///
260 /// # Errors
261 ///
262 /// Returns a [`SpillError`] if the reload fails (for example,
263 /// because the spill file is no longer readable).
264 fn reload_to_ram(&self) -> std::result::Result<(), SpillError> {
265 Ok(())
266 }
267}
268
269// ── Tier Override ───────────────────────────────────────────────────
270
271/// Controls whether a section stays in RAM, on disk, or is auto-managed.
272///
273/// The default (`Auto`) lets the [`BufferManager`](crate::memory::buffer::BufferManager)
274/// decide based on memory pressure. Power users can pin a section to a
275/// specific tier for predictable performance.
276#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
277#[non_exhaustive]
278pub enum TierOverride {
279 /// Memory-first, spill to disk when budget exceeded (default).
280 #[default]
281 Auto,
282 /// Always keep in RAM. Fail with error if insufficient memory.
283 ForceRam,
284 /// Always use disk (mmap). Minimal RAM footprint.
285 ForceDisk,
286}
287
288/// Per-section memory configuration.
289///
290/// Allows power users to cap individual sections or pin them to a tier.
291/// Most users leave this at default (all sections auto-managed within the
292/// global memory budget).
293#[derive(Debug, Clone)]
294pub struct SectionMemoryConfig {
295 /// Hard cap on this section's RAM usage (bytes).
296 /// `None` means the section participates in the global budget with no
297 /// per-section cap. The BufferManager decides when to spill.
298 pub max_ram: Option<usize>,
299 /// Storage tier override.
300 pub tier: TierOverride,
301}
302
303impl Default for SectionMemoryConfig {
304 fn default() -> Self {
305 Self {
306 max_ram: None,
307 tier: TierOverride::Auto,
308 }
309 }
310}
311
312#[cfg(test)]
313mod tests {
314 use super::*;
315
316 #[test]
317 fn section_type_classification() {
318 assert!(SectionType::Catalog.is_data_section());
319 assert!(SectionType::LpgStore.is_data_section());
320 assert!(SectionType::RdfStore.is_data_section());
321 assert!(!SectionType::VectorStore.is_data_section());
322
323 assert!(!SectionType::Catalog.is_index_section());
324 assert!(SectionType::VectorStore.is_index_section());
325 assert!(SectionType::TextIndex.is_index_section());
326 assert!(SectionType::RdfRing.is_index_section());
327 assert!(SectionType::PropertyIndex.is_index_section());
328 }
329
330 #[test]
331 fn section_flags_roundtrip() {
332 let flags = SectionFlags {
333 required: true,
334 mmap_able: false,
335 };
336 assert_eq!(flags.to_byte(), 0x01);
337 assert_eq!(SectionFlags::from_byte(0x01), flags);
338
339 let flags = SectionFlags {
340 required: false,
341 mmap_able: true,
342 };
343 assert_eq!(flags.to_byte(), 0x02);
344 assert_eq!(SectionFlags::from_byte(0x02), flags);
345
346 let flags = SectionFlags {
347 required: true,
348 mmap_able: true,
349 };
350 assert_eq!(flags.to_byte(), 0x03);
351 assert_eq!(SectionFlags::from_byte(0x03), flags);
352
353 let empty = SectionFlags::default();
354 assert_eq!(empty.to_byte(), 0x00);
355 assert_eq!(SectionFlags::from_byte(0x00), empty);
356 }
357
358 #[test]
359 fn default_flags_by_type() {
360 let catalog = SectionType::Catalog.default_flags();
361 assert!(catalog.required);
362 assert!(!catalog.mmap_able);
363
364 let vector = SectionType::VectorStore.default_flags();
365 assert!(!vector.required);
366 assert!(vector.mmap_able);
367
368 let rdf = SectionType::RdfStore.default_flags();
369 assert!(!rdf.required);
370 assert!(
371 !rdf.mmap_able,
372 "data sections must be deserialized, not mmap'd"
373 );
374 }
375
376 #[test]
377 fn directory_entry_size() {
378 assert_eq!(SectionDirectoryEntry::SIZE, 32);
379 }
380
381 #[test]
382 fn alix_tier_override_variants() {
383 assert_eq!(TierOverride::Auto, TierOverride::default());
384 // Verify all variants are distinct
385 assert_ne!(TierOverride::Auto, TierOverride::ForceRam);
386 assert_ne!(TierOverride::Auto, TierOverride::ForceDisk);
387 assert_ne!(TierOverride::ForceRam, TierOverride::ForceDisk);
388 }
389
390 #[test]
391 fn gus_section_memory_config_default() {
392 let config = SectionMemoryConfig::default();
393 assert!(config.max_ram.is_none());
394 assert_eq!(config.tier, TierOverride::Auto);
395 }
396
397 #[test]
398 fn vincent_section_memory_config_with_cap() {
399 let config = SectionMemoryConfig {
400 max_ram: Some(1024 * 1024),
401 tier: TierOverride::ForceRam,
402 };
403 assert_eq!(config.max_ram, Some(1024 * 1024));
404 assert_eq!(config.tier, TierOverride::ForceRam);
405 }
406
407 #[test]
408 fn jules_force_disk_tier() {
409 let config = SectionMemoryConfig {
410 max_ram: None,
411 tier: TierOverride::ForceDisk,
412 };
413 assert_eq!(config.tier, TierOverride::ForceDisk);
414 }
415
416 #[test]
417 fn mia_lpg_store_default_flags_distinct_from_rdf() {
418 let lpg = SectionType::LpgStore.default_flags();
419 let rdf = SectionType::RdfStore.default_flags();
420 // LpgStore is required, RdfStore is not
421 assert!(lpg.required);
422 assert!(!rdf.required);
423 // Data sections must be deserialized into RAM, not mmap'd
424 assert!(!lpg.mmap_able, "LpgStore is a data section, not mmap-able");
425 assert!(!rdf.mmap_able, "RdfStore is a data section, not mmap-able");
426 }
427
428 #[test]
429 fn butch_index_section_default_flags_all_variants() {
430 // All index section types share the same flags
431 for section_type in [
432 SectionType::VectorStore,
433 SectionType::TextIndex,
434 SectionType::RdfRing,
435 SectionType::PropertyIndex,
436 ] {
437 let flags = section_type.default_flags();
438 assert!(!flags.required, "{section_type:?} should not be required");
439 assert!(flags.mmap_able, "{section_type:?} should be mmap-able");
440 }
441 }
442
443 #[test]
444 fn django_directory_entry_construction() {
445 let entry = SectionDirectoryEntry {
446 section_type: SectionType::LpgStore,
447 version: 1,
448 flags: SectionFlags {
449 required: true,
450 mmap_able: false,
451 },
452 offset: 4096,
453 length: 8192,
454 checksum: 0xDEAD_BEEF,
455 };
456 assert_eq!(entry.section_type, SectionType::LpgStore);
457 assert_eq!(entry.version, 1);
458 assert!(entry.flags.required);
459 assert!(!entry.flags.mmap_able);
460 assert_eq!(entry.offset, 4096);
461 assert_eq!(entry.length, 8192);
462 assert_eq!(entry.checksum, 0xDEAD_BEEF);
463 }
464
465 #[test]
466 fn shosanna_section_type_is_data_vs_index_boundary() {
467 // Data sections: discriminant < 10
468 assert!(SectionType::Catalog.is_data_section());
469 assert!(!SectionType::Catalog.is_index_section());
470
471 // Index sections: discriminant >= 10
472 assert!(SectionType::VectorStore.is_index_section());
473 assert!(!SectionType::VectorStore.is_data_section());
474
475 // PropertyIndex at discriminant 20 is still an index section
476 assert!(SectionType::PropertyIndex.is_index_section());
477 assert!(!SectionType::PropertyIndex.is_data_section());
478 }
479
480 #[test]
481 fn hans_section_flags_extra_bits_ignored() {
482 // Bits beyond 0 and 1 are ignored by from_byte
483 let flags = SectionFlags::from_byte(0xFF);
484 assert!(flags.required);
485 assert!(flags.mmap_able);
486
487 let flags = SectionFlags::from_byte(0xFC);
488 assert!(!flags.required);
489 assert!(!flags.mmap_able);
490 }
491
492 #[test]
493 fn beatrix_directory_entry_clone_eq() {
494 let entry = SectionDirectoryEntry {
495 section_type: SectionType::RdfRing,
496 version: 2,
497 flags: SectionFlags {
498 required: false,
499 mmap_able: true,
500 },
501 offset: 0,
502 length: 1024,
503 checksum: 42,
504 };
505 let cloned = entry.clone();
506 assert_eq!(entry, cloned);
507 }
508
509 /// Minimal Section trait implementation for testing default methods.
510 struct StubSection {
511 dirty: bool,
512 }
513
514 impl Section for StubSection {
515 fn section_type(&self) -> SectionType {
516 SectionType::LpgStore
517 }
518
519 fn serialize(&self) -> crate::utils::error::Result<Vec<u8>> {
520 Ok(vec![1, 2, 3])
521 }
522
523 fn deserialize(&mut self, _data: &[u8]) -> crate::utils::error::Result<()> {
524 Ok(())
525 }
526
527 fn is_dirty(&self) -> bool {
528 self.dirty
529 }
530
531 fn mark_clean(&self) {}
532
533 fn memory_usage(&self) -> usize {
534 64
535 }
536 }
537
538 #[test]
539 fn mia_section_trait_default_version() {
540 let stub = StubSection { dirty: false };
541 // The default version() method returns 1
542 assert_eq!(stub.version(), 1);
543 assert_eq!(stub.section_type(), SectionType::LpgStore);
544 assert!(!stub.is_dirty());
545 assert_eq!(stub.memory_usage(), 64);
546 }
547
548 #[test]
549 fn butch_section_trait_serialize_deserialize() {
550 let mut stub = StubSection { dirty: true };
551 assert!(stub.is_dirty());
552
553 let data = stub.serialize().unwrap();
554 assert_eq!(data, vec![1, 2, 3]);
555
556 stub.deserialize(&[4, 5, 6]).unwrap();
557 stub.mark_clean();
558 }
559}