Skip to main content

sqry_core/graph/unified/storage/
segment.rs

1//! Per-file segment table mapping `FileId` to contiguous node ranges.
2//!
3//! `FileSegmentTable` records the `(start_slot, slot_count)` pair for each
4//! file's nodes in the `NodeArena`. This is populated during the initial full
5//! build's Phase 3 parallel commit and persisted in V10+ snapshots.
6//!
7//! # Design
8//!
9//! During the full build pipeline, `phase2_assign_ranges` computes a disjoint
10//! `node_range` for each file in each chunk. `FileSegmentTable::record_range`
11//! is called once per file after Phase 3 parallel commit to store these ranges.
12//!
13//! For incremental re-indexing (`reindex_files`), the segment table is used to
14//! identify which node slots belong to a file so they can be tombstoned before
15//! the file is re-parsed and committed at a new append-only range.
16
17use serde::{Deserialize, Serialize};
18
19use crate::graph::unified::file::id::FileId;
20
21/// Per-file segment in the node arena.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
23pub struct FileSegment {
24    /// First node slot index in the arena for this file.
25    pub start_slot: u32,
26    /// Number of contiguous node slots assigned to this file.
27    pub slot_count: u32,
28}
29
30impl FileSegment {
31    /// Returns the exclusive end of the slot range.
32    #[inline]
33    #[must_use]
34    pub fn end_slot(&self) -> u32 {
35        self.start_slot.saturating_add(self.slot_count)
36    }
37
38    /// Returns `true` if the segment is empty (no nodes).
39    #[inline]
40    #[must_use]
41    pub fn is_empty(&self) -> bool {
42        self.slot_count == 0
43    }
44}
45
46/// Maps `FileId` to contiguous node ranges in the `NodeArena`.
47///
48/// Indexed by `FileId` (as `u32`). Files without segments have `None` entries.
49/// The table is populated during the initial full build and updated during
50/// incremental re-indexing.
51#[derive(Debug, Clone, Default, Serialize, Deserialize)]
52pub struct FileSegmentTable {
53    /// Segments indexed by `FileId.as_u32()`. `None` entries indicate files
54    /// that have been removed or were never indexed.
55    segments: Vec<Option<FileSegment>>,
56}
57
58impl FileSegmentTable {
59    /// Creates an empty segment table.
60    #[must_use]
61    pub fn new() -> Self {
62        Self {
63            segments: Vec::new(),
64        }
65    }
66
67    /// Creates a table pre-sized for the given number of files.
68    #[must_use]
69    pub fn with_capacity(file_count: usize) -> Self {
70        Self {
71            segments: vec![None; file_count],
72        }
73    }
74
75    /// Records a segment for a file. Grows the table if necessary.
76    pub fn record_range(&mut self, file_id: FileId, start_slot: u32, slot_count: u32) {
77        let idx = file_id.index() as usize;
78        if idx >= self.segments.len() {
79            self.segments.resize(idx + 1, None);
80        }
81        self.segments[idx] = Some(FileSegment {
82            start_slot,
83            slot_count,
84        });
85    }
86
87    /// Returns the segment for a file, or `None` if unknown.
88    #[inline]
89    #[must_use]
90    pub fn get(&self, file_id: FileId) -> Option<&FileSegment> {
91        let idx = file_id.index() as usize;
92        self.segments.get(idx).and_then(|opt| opt.as_ref())
93    }
94
95    /// Removes a file's segment entry (used during incremental re-indexing
96    /// when a file's nodes are tombstoned).
97    pub fn remove(&mut self, file_id: FileId) {
98        let idx = file_id.index() as usize;
99        if idx < self.segments.len() {
100            self.segments[idx] = None;
101        }
102    }
103
104    /// Returns the number of files with recorded segments.
105    #[must_use]
106    pub fn segment_count(&self) -> usize {
107        self.segments.iter().filter(|s| s.is_some()).count()
108    }
109
110    /// Returns the total number of node slots tracked across all segments.
111    #[must_use]
112    pub fn total_slots(&self) -> u64 {
113        self.segments
114            .iter()
115            .filter_map(|s| s.as_ref())
116            .map(|s| u64::from(s.slot_count))
117            .sum()
118    }
119
120    /// Returns an iterator over all `(FileId, &FileSegment)` pairs.
121    ///
122    /// # Panics
123    ///
124    /// Panics if the segment table length exceeds `u32::MAX`, which would make
125    /// the segment position unrepresentable as a [`FileId`].
126    pub fn iter(&self) -> impl Iterator<Item = (FileId, &FileSegment)> + '_ {
127        self.segments.iter().enumerate().filter_map(|(idx, opt)| {
128            opt.as_ref().map(|seg| {
129                (
130                    FileId::new(u32::try_from(idx).expect("file segment index fits u32")),
131                    seg,
132                )
133            })
134        })
135    }
136
137    /// Returns the backing storage length (for diagnostics).
138    #[must_use]
139    pub fn capacity(&self) -> usize {
140        self.segments.len()
141    }
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    #[test]
149    fn record_and_retrieve() {
150        let mut table = FileSegmentTable::new();
151        table.record_range(FileId::new(0), 0, 10);
152        table.record_range(FileId::new(5), 10, 20);
153
154        let seg0 = table.get(FileId::new(0)).unwrap();
155        assert_eq!(seg0.start_slot, 0);
156        assert_eq!(seg0.slot_count, 10);
157        assert_eq!(seg0.end_slot(), 10);
158
159        let seg5 = table.get(FileId::new(5)).unwrap();
160        assert_eq!(seg5.start_slot, 10);
161        assert_eq!(seg5.slot_count, 20);
162
163        assert!(table.get(FileId::new(3)).is_none());
164        assert_eq!(table.segment_count(), 2);
165        assert_eq!(table.total_slots(), 30);
166    }
167
168    #[test]
169    fn remove_segment() {
170        let mut table = FileSegmentTable::new();
171        table.record_range(FileId::new(1), 0, 5);
172        assert!(table.get(FileId::new(1)).is_some());
173
174        table.remove(FileId::new(1));
175        assert!(table.get(FileId::new(1)).is_none());
176        assert_eq!(table.segment_count(), 0);
177    }
178
179    #[test]
180    fn overwrite_segment() {
181        let mut table = FileSegmentTable::new();
182        table.record_range(FileId::new(1), 0, 5);
183        table.record_range(FileId::new(1), 100, 15);
184
185        let seg = table.get(FileId::new(1)).unwrap();
186        assert_eq!(seg.start_slot, 100);
187        assert_eq!(seg.slot_count, 15);
188    }
189
190    #[test]
191    fn iter_over_segments() {
192        let mut table = FileSegmentTable::new();
193        table.record_range(FileId::new(0), 0, 10);
194        table.record_range(FileId::new(2), 10, 5);
195
196        let entries: Vec<_> = table.iter().collect();
197        assert_eq!(entries.len(), 2);
198        assert_eq!(entries[0].0, FileId::new(0));
199        assert_eq!(entries[1].0, FileId::new(2));
200    }
201
202    #[test]
203    fn empty_segment() {
204        let seg = FileSegment {
205            start_slot: 0,
206            slot_count: 0,
207        };
208        assert!(seg.is_empty());
209        assert_eq!(seg.end_slot(), 0);
210    }
211}