omendb_core/omen/
graph.rs

1//! Graph section - HNSW neighbor lists
2#![allow(clippy::cast_ptr_alignment)] // mmap data is aligned by our serialization
3
4use memmap2::MmapMut;
5use std::io::{self, Write};
6
7/// Graph section - stores HNSW neighbor lists
8///
9/// Layout:
10/// - Node levels: [u8; count] - max level for each node
11/// - Level 0 offsets: [u32; count] - offset into `level0_neighbors`
12/// - Level 0 neighbors: [u32; ...] - neighbor IDs (varint would be better but simpler for now)
13/// - Upper offsets: [u32; ...] - for nodes with level > 0
14/// - Upper neighbors: [u32; ...] - upper level neighbor IDs
15pub struct GraphSection {
16    count: u64,
17    m: u16, // Max neighbors per layer
18    /// Memory-mapped data
19    data: *const u8,
20    data_len: usize,
21    /// Parsed offsets for fast access
22    level0_neighbors_start: usize,
23    upper_section_start: usize,
24}
25
26// Safety: GraphSection is read-only after creation
27unsafe impl Send for GraphSection {}
28unsafe impl Sync for GraphSection {}
29
30impl GraphSection {
31    /// Create from mmap region
32    ///
33    /// # Safety
34    /// The mmap must remain valid for the lifetime of this section.
35    pub unsafe fn from_mmap(
36        mmap: &MmapMut,
37        offset: usize,
38        length: usize,
39        count: u64,
40        m: u16,
41    ) -> io::Result<Self> {
42        if length == 0 || count == 0 {
43            return Ok(Self {
44                count: 0,
45                m,
46                data: std::ptr::null(),
47                data_len: 0,
48                level0_neighbors_start: 0,
49                upper_section_start: 0,
50            });
51        }
52
53        let ptr = mmap.as_ptr().add(offset);
54
55        // Calculate section boundaries
56        let levels_size = count as usize;
57        let offsets_size = count as usize * 4; // u32 offsets
58        let level0_neighbors_start = levels_size + offsets_size;
59
60        Ok(Self {
61            count,
62            m,
63            data: ptr,
64            data_len: length,
65            level0_neighbors_start,
66            upper_section_start: length, // Will be set properly on load
67        })
68    }
69
70    /// Create empty section for building
71    #[must_use]
72    pub fn new(m: u16) -> Self {
73        Self {
74            count: 0,
75            m,
76            data: std::ptr::null(),
77            data_len: 0,
78            level0_neighbors_start: 0,
79            upper_section_start: 0,
80        }
81    }
82
83    /// Get node level
84    #[inline]
85    #[must_use]
86    pub fn get_level(&self, node_id: u32) -> Option<u8> {
87        if node_id as u64 >= self.count || self.data.is_null() {
88            return None;
89        }
90        // Safety: bounds checked
91        unsafe { Some(*self.data.add(node_id as usize)) }
92    }
93
94    /// Get level 0 neighbors for a node
95    #[inline]
96    #[must_use]
97    pub fn get_neighbors_level0(&self, node_id: u32) -> Option<&[u32]> {
98        if node_id as u64 >= self.count || self.data.is_null() {
99            return None;
100        }
101
102        // Read offset from offsets table (use checked arithmetic to prevent overflow)
103        let offset_pos = (self.count as usize).checked_add((node_id as usize).checked_mul(4)?)?;
104        if offset_pos.checked_add(4)? > self.data_len {
105            return None;
106        }
107
108        let offset = unsafe {
109            let ptr = self.data.add(offset_pos).cast::<u32>();
110            ptr.read_unaligned().to_le() as usize
111        };
112
113        // Read neighbor count (first u32 at offset)
114        let neighbor_start = self.level0_neighbors_start.checked_add(offset)?;
115        if neighbor_start.checked_add(4)? > self.data_len {
116            return None;
117        }
118
119        let neighbor_count = unsafe {
120            let ptr = self.data.add(neighbor_start).cast::<u32>();
121            ptr.read_unaligned().to_le() as usize
122        };
123
124        if neighbor_count == 0 {
125            return Some(&[]);
126        }
127
128        // Safety: bounds should be valid if file is not corrupted
129        let neighbors_ptr = neighbor_start.checked_add(4)?;
130        let neighbors_end = neighbors_ptr.checked_add(neighbor_count.checked_mul(4)?)?;
131        if neighbors_end > self.upper_section_start {
132            return None;
133        }
134
135        unsafe {
136            let ptr = self.data.add(neighbors_ptr).cast::<u32>();
137            Some(std::slice::from_raw_parts(ptr, neighbor_count))
138        }
139    }
140
141    /// Get max neighbors per layer
142    #[must_use]
143    pub fn m(&self) -> u16 {
144        self.m
145    }
146
147    /// Get node count
148    #[must_use]
149    pub fn count(&self) -> u64 {
150        self.count
151    }
152
153    /// Serialize graph to bytes
154    ///
155    /// Format:
156    /// - levels: [u8; count]
157    /// - offsets: [u32; count]
158    /// - neighbors: for each node: [count: u32, `neighbor_ids`: [u32; count]]
159    pub fn write_graph<W: Write>(
160        writer: &mut W,
161        levels: &[u8],
162        neighbors: &[Vec<u32>],
163    ) -> io::Result<()> {
164        let count = levels.len();
165        assert_eq!(count, neighbors.len());
166
167        // Write levels
168        writer.write_all(levels)?;
169
170        // Calculate offsets and write
171        let mut current_offset: u32 = 0;
172        for node_neighbors in neighbors {
173            writer.write_all(&current_offset.to_le_bytes())?;
174            // Each neighbor list: count (u32) + neighbors (u32 * count)
175            current_offset += 4 + (node_neighbors.len() as u32 * 4);
176        }
177
178        // Write neighbor lists
179        for node_neighbors in neighbors {
180            writer.write_all(&(node_neighbors.len() as u32).to_le_bytes())?;
181            for &neighbor in node_neighbors {
182                writer.write_all(&neighbor.to_le_bytes())?;
183            }
184        }
185
186        Ok(())
187    }
188
189    /// Calculate size in bytes for graph
190    #[must_use]
191    pub fn size_for_graph(levels: &[u8], neighbors: &[Vec<u32>]) -> usize {
192        let count = levels.len();
193        let levels_size = count;
194        let offsets_size = count * 4;
195        let neighbors_size: usize = neighbors
196            .iter()
197            .map(|n| 4 + n.len() * 4) // count + neighbors
198            .sum();
199        levels_size + offsets_size + neighbors_size
200    }
201}
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206
207    #[test]
208    fn test_graph_size_calculation() {
209        let levels = vec![0u8; 100];
210        let neighbors: Vec<Vec<u32>> = (0..100).map(|_| vec![1, 2, 3, 4]).collect();
211
212        let size = GraphSection::size_for_graph(&levels, &neighbors);
213        // 100 levels + 100*4 offsets + 100*(4 + 4*4) neighbor lists
214        assert_eq!(size, 100 + 400 + 100 * 20);
215    }
216}