omendb_core/omen/
graph.rs

1//! Graph section - HNSW neighbor lists
2#![allow(clippy::cast_ptr_alignment)] // mmap data is aligned by our serialization
3
4use memmap2::MmapMut;
5use std::io::{self, Write};
6
7/// Graph section - stores HNSW neighbor lists
8///
9/// Layout:
10/// - Node levels: [u8; count] - max level for each node
11/// - Level 0 offsets: [u32; count] - offset into `level0_neighbors`
12/// - Level 0 neighbors: [u32; ...] - neighbor IDs (varint would be better but simpler for now)
13/// - Upper offsets: [u32; ...] - for nodes with level > 0
14/// - Upper neighbors: [u32; ...] - upper level neighbor IDs
15pub struct GraphSection {
16    count: u64,
17    m: u16, // Max neighbors per layer
18    /// Memory-mapped data
19    data: *const u8,
20    data_len: usize,
21    /// Parsed offsets for fast access
22    level0_neighbors_start: usize,
23    upper_section_start: usize,
24}
25
26// Safety: GraphSection is read-only after creation
27unsafe impl Send for GraphSection {}
28unsafe impl Sync for GraphSection {}
29
30impl GraphSection {
31    /// Create from mmap region
32    ///
33    /// # Safety
34    /// The mmap must remain valid for the lifetime of this section.
35    pub unsafe fn from_mmap(
36        mmap: &MmapMut,
37        offset: usize,
38        length: usize,
39        count: u64,
40        m: u16,
41    ) -> io::Result<Self> {
42        if length == 0 || count == 0 {
43            return Ok(Self {
44                count: 0,
45                m,
46                data: std::ptr::null(),
47                data_len: 0,
48                level0_neighbors_start: 0,
49                upper_section_start: 0,
50            });
51        }
52
53        let ptr = mmap.as_ptr().add(offset);
54
55        // Calculate section boundaries
56        let levels_size = count as usize;
57        let offsets_size = count as usize * 4; // u32 offsets
58        let level0_neighbors_start = levels_size + offsets_size;
59
60        Ok(Self {
61            count,
62            m,
63            data: ptr,
64            data_len: length,
65            level0_neighbors_start,
66            upper_section_start: length, // Will be set properly on load
67        })
68    }
69
70    /// Create empty section for building
71    #[must_use]
72    pub fn new(m: u16) -> Self {
73        Self {
74            count: 0,
75            m,
76            data: std::ptr::null(),
77            data_len: 0,
78            level0_neighbors_start: 0,
79            upper_section_start: 0,
80        }
81    }
82
83    /// Get node level
84    #[inline]
85    #[must_use]
86    pub fn get_level(&self, node_id: u32) -> Option<u8> {
87        if node_id as u64 >= self.count || self.data.is_null() {
88            return None;
89        }
90        // Safety: bounds checked
91        unsafe { Some(*self.data.add(node_id as usize)) }
92    }
93
94    /// Get level 0 neighbors for a node
95    #[inline]
96    #[must_use]
97    pub fn get_neighbors_level0(&self, node_id: u32) -> Option<&[u32]> {
98        if node_id as u64 >= self.count || self.data.is_null() {
99            return None;
100        }
101
102        // Read offset from offsets table
103        let offset_pos = self.count as usize + node_id as usize * 4;
104        if offset_pos + 4 > self.data_len {
105            return None;
106        }
107
108        let offset = unsafe {
109            let ptr = self.data.add(offset_pos).cast::<u32>();
110            ptr.read_unaligned().to_le() as usize
111        };
112
113        // Read neighbor count (first u32 at offset)
114        let neighbor_start = self.level0_neighbors_start + offset;
115        if neighbor_start + 4 > self.data_len {
116            return None;
117        }
118
119        let neighbor_count = unsafe {
120            let ptr = self.data.add(neighbor_start).cast::<u32>();
121            ptr.read_unaligned().to_le() as usize
122        };
123
124        if neighbor_count == 0 {
125            return Some(&[]);
126        }
127
128        // Safety: bounds should be valid if file is not corrupted
129        let neighbors_ptr = neighbor_start + 4;
130        if neighbors_ptr + neighbor_count * 4 > self.upper_section_start {
131            return None;
132        }
133
134        unsafe {
135            let ptr = self.data.add(neighbors_ptr).cast::<u32>();
136            Some(std::slice::from_raw_parts(ptr, neighbor_count))
137        }
138    }
139
140    /// Get max neighbors per layer
141    #[must_use]
142    pub fn m(&self) -> u16 {
143        self.m
144    }
145
146    /// Get node count
147    #[must_use]
148    pub fn count(&self) -> u64 {
149        self.count
150    }
151
152    /// Serialize graph to bytes
153    ///
154    /// Format:
155    /// - levels: [u8; count]
156    /// - offsets: [u32; count]
157    /// - neighbors: for each node: [count: u32, `neighbor_ids`: [u32; count]]
158    pub fn write_graph<W: Write>(
159        writer: &mut W,
160        levels: &[u8],
161        neighbors: &[Vec<u32>],
162    ) -> io::Result<()> {
163        let count = levels.len();
164        assert_eq!(count, neighbors.len());
165
166        // Write levels
167        writer.write_all(levels)?;
168
169        // Calculate offsets and write
170        let mut current_offset: u32 = 0;
171        for node_neighbors in neighbors {
172            writer.write_all(&current_offset.to_le_bytes())?;
173            // Each neighbor list: count (u32) + neighbors (u32 * count)
174            current_offset += 4 + (node_neighbors.len() as u32 * 4);
175        }
176
177        // Write neighbor lists
178        for node_neighbors in neighbors {
179            writer.write_all(&(node_neighbors.len() as u32).to_le_bytes())?;
180            for &neighbor in node_neighbors {
181                writer.write_all(&neighbor.to_le_bytes())?;
182            }
183        }
184
185        Ok(())
186    }
187
188    /// Calculate size in bytes for graph
189    #[must_use]
190    pub fn size_for_graph(levels: &[u8], neighbors: &[Vec<u32>]) -> usize {
191        let count = levels.len();
192        let levels_size = count;
193        let offsets_size = count * 4;
194        let neighbors_size: usize = neighbors
195            .iter()
196            .map(|n| 4 + n.len() * 4) // count + neighbors
197            .sum();
198        levels_size + offsets_size + neighbors_size
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn test_graph_size_calculation() {
208        let levels = vec![0u8; 100];
209        let neighbors: Vec<Vec<u32>> = (0..100).map(|_| vec![1, 2, 3, 4]).collect();
210
211        let size = GraphSection::size_for_graph(&levels, &neighbors);
212        // 100 levels + 100*4 offsets + 100*(4 + 4*4) neighbor lists
213        assert_eq!(size, 100 + 400 + 100 * 20);
214    }
215}