Skip to main content

sqry_core/graph/unified/storage/
segment.rs

1//! Per-file segment table mapping `FileId` to contiguous node ranges.
2//!
3//! `FileSegmentTable` records the `(start_slot, slot_count)` pair for each
4//! file's nodes in the `NodeArena`. This is populated during the initial full
5//! build's Phase 3 parallel commit and persisted in V10+ snapshots.
6//!
7//! # Design
8//!
9//! During the full build pipeline, `phase2_assign_ranges` computes a disjoint
10//! `node_range` for each file in each chunk. `FileSegmentTable::record_range`
11//! is called once per file after Phase 3 parallel commit to store these ranges.
12//!
13//! For incremental re-indexing (`reindex_files`), the segment table is used to
14//! identify which node slots belong to a file so they can be tombstoned before
15//! the file is re-parsed and committed at a new append-only range.
16
17use serde::{Deserialize, Serialize};
18
19use crate::graph::unified::file::id::FileId;
20
21/// Per-file segment in the node arena.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
23pub struct FileSegment {
24    /// First node slot index in the arena for this file.
25    pub start_slot: u32,
26    /// Number of contiguous node slots assigned to this file.
27    pub slot_count: u32,
28}
29
30impl FileSegment {
31    /// Returns the exclusive end of the slot range.
32    #[inline]
33    #[must_use]
34    pub fn end_slot(&self) -> u32 {
35        self.start_slot.saturating_add(self.slot_count)
36    }
37
38    /// Returns `true` if the segment is empty (no nodes).
39    #[inline]
40    #[must_use]
41    pub fn is_empty(&self) -> bool {
42        self.slot_count == 0
43    }
44}
45
46/// Maps `FileId` to contiguous node ranges in the `NodeArena`.
47///
48/// Indexed by `FileId` (as `u32`). Files without segments have `None` entries.
49/// The table is populated during the initial full build and updated during
50/// incremental re-indexing.
51#[derive(Debug, Clone, Default, Serialize, Deserialize)]
52pub struct FileSegmentTable {
53    /// Segments indexed by `FileId.as_u32()`. `None` entries indicate files
54    /// that have been removed or were never indexed.
55    segments: Vec<Option<FileSegment>>,
56}
57
58impl FileSegmentTable {
59    /// Creates an empty segment table.
60    #[must_use]
61    pub fn new() -> Self {
62        Self {
63            segments: Vec::new(),
64        }
65    }
66
67    /// Creates a table pre-sized for the given number of files.
68    #[must_use]
69    pub fn with_capacity(file_count: usize) -> Self {
70        Self {
71            segments: vec![None; file_count],
72        }
73    }
74
75    /// Records a segment for a file. Grows the table if necessary.
76    pub fn record_range(&mut self, file_id: FileId, start_slot: u32, slot_count: u32) {
77        let idx = file_id.index() as usize;
78        if idx >= self.segments.len() {
79            self.segments.resize(idx + 1, None);
80        }
81        self.segments[idx] = Some(FileSegment {
82            start_slot,
83            slot_count,
84        });
85    }
86
87    /// Returns the segment for a file, or `None` if unknown.
88    #[inline]
89    #[must_use]
90    pub fn get(&self, file_id: FileId) -> Option<&FileSegment> {
91        let idx = file_id.index() as usize;
92        self.segments.get(idx).and_then(|opt| opt.as_ref())
93    }
94
95    /// Removes a file's segment entry (used during incremental re-indexing
96    /// when a file's nodes are tombstoned).
97    pub fn remove(&mut self, file_id: FileId) {
98        let idx = file_id.index() as usize;
99        if idx < self.segments.len() {
100            self.segments[idx] = None;
101        }
102    }
103
104    /// Returns the number of files with recorded segments.
105    #[must_use]
106    pub fn segment_count(&self) -> usize {
107        self.segments.iter().filter(|s| s.is_some()).count()
108    }
109
110    /// Returns the total number of node slots tracked across all segments.
111    #[must_use]
112    pub fn total_slots(&self) -> u64 {
113        self.segments
114            .iter()
115            .filter_map(|s| s.as_ref())
116            .map(|s| u64::from(s.slot_count))
117            .sum()
118    }
119
120    /// Returns an iterator over all `(FileId, &FileSegment)` pairs.
121    pub fn iter(&self) -> impl Iterator<Item = (FileId, &FileSegment)> + '_ {
122        self.segments
123            .iter()
124            .enumerate()
125            .filter_map(|(idx, opt)| opt.as_ref().map(|seg| (FileId::new(idx as u32), seg)))
126    }
127
128    /// Returns the backing storage length (for diagnostics).
129    #[must_use]
130    pub fn capacity(&self) -> usize {
131        self.segments.len()
132    }
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138
139    #[test]
140    fn record_and_retrieve() {
141        let mut table = FileSegmentTable::new();
142        table.record_range(FileId::new(0), 0, 10);
143        table.record_range(FileId::new(5), 10, 20);
144
145        let seg0 = table.get(FileId::new(0)).unwrap();
146        assert_eq!(seg0.start_slot, 0);
147        assert_eq!(seg0.slot_count, 10);
148        assert_eq!(seg0.end_slot(), 10);
149
150        let seg5 = table.get(FileId::new(5)).unwrap();
151        assert_eq!(seg5.start_slot, 10);
152        assert_eq!(seg5.slot_count, 20);
153
154        assert!(table.get(FileId::new(3)).is_none());
155        assert_eq!(table.segment_count(), 2);
156        assert_eq!(table.total_slots(), 30);
157    }
158
159    #[test]
160    fn remove_segment() {
161        let mut table = FileSegmentTable::new();
162        table.record_range(FileId::new(1), 0, 5);
163        assert!(table.get(FileId::new(1)).is_some());
164
165        table.remove(FileId::new(1));
166        assert!(table.get(FileId::new(1)).is_none());
167        assert_eq!(table.segment_count(), 0);
168    }
169
170    #[test]
171    fn overwrite_segment() {
172        let mut table = FileSegmentTable::new();
173        table.record_range(FileId::new(1), 0, 5);
174        table.record_range(FileId::new(1), 100, 15);
175
176        let seg = table.get(FileId::new(1)).unwrap();
177        assert_eq!(seg.start_slot, 100);
178        assert_eq!(seg.slot_count, 15);
179    }
180
181    #[test]
182    fn iter_over_segments() {
183        let mut table = FileSegmentTable::new();
184        table.record_range(FileId::new(0), 0, 10);
185        table.record_range(FileId::new(2), 10, 5);
186
187        let entries: Vec<_> = table.iter().collect();
188        assert_eq!(entries.len(), 2);
189        assert_eq!(entries[0].0, FileId::new(0));
190        assert_eq!(entries[1].0, FileId::new(2));
191    }
192
193    #[test]
194    fn empty_segment() {
195        let seg = FileSegment {
196            start_slot: 0,
197            slot_count: 0,
198        };
199        assert!(seg.is_empty());
200        assert_eq!(seg.end_slot(), 0);
201    }
202}