claw_vector/store/
mmap.rs1use std::{
3 fs::OpenOptions,
4 path::{Path, PathBuf},
5};
6
7use byteorder::{ByteOrder, LittleEndian};
8use memmap2::{MmapMut, MmapOptions};
9
10use crate::error::{VectorError, VectorResult};
11
12const HEADER_SIZE: usize = 64;
13const MAGIC: &[u8; 8] = b"CLAWVEC1";
14const VERSION: u32 = 1;
15
16#[derive(Debug, Clone)]
18pub struct VecFileHeader {
19 pub magic: [u8; 8],
21 pub version: u32,
23 pub dimensions: u32,
25 pub element_count: u64,
27 pub reserved: [u8; 40],
29}
30
31pub struct MmapVectorFile {
33 pub mmap: MmapMut,
35 pub header: VecFileHeader,
37 pub path: PathBuf,
39}
40
41impl MmapVectorFile {
42 pub fn create(path: &Path, dimensions: usize, max_elements: usize) -> VectorResult<Self> {
44 if dimensions == 0 {
45 return Err(VectorError::Config(
46 "mmap vector file dimensions must be greater than zero".into(),
47 ));
48 }
49 if max_elements == 0 {
50 return Err(VectorError::Config(
51 "mmap vector file max_elements must be greater than zero".into(),
52 ));
53 }
54 if let Some(parent) = path.parent() {
55 std::fs::create_dir_all(parent)?;
56 }
57
58 let file_size = HEADER_SIZE + max_elements * dimensions * std::mem::size_of::<f32>();
59 let file = OpenOptions::new()
60 .read(true)
61 .write(true)
62 .create(true)
63 .truncate(true)
64 .open(path)?;
65 file.set_len(file_size as u64)?;
66
67 let mmap = map_mut(&file)?;
68 let header = VecFileHeader {
69 magic: *MAGIC,
70 version: VERSION,
71 dimensions: dimensions as u32,
72 element_count: 0,
73 reserved: [0; 40],
74 };
75
76 let mut file = MmapVectorFile {
77 mmap,
78 header,
79 path: path.to_path_buf(),
80 };
81 file.sync_header();
82 file.flush()?;
83 Ok(file)
84 }
85
86 pub fn open(path: &Path) -> VectorResult<Self> {
88 let file = OpenOptions::new().read(true).write(true).open(path)?;
89 let metadata = file.metadata()?;
90 if metadata.len() < HEADER_SIZE as u64 {
91 return Err(VectorError::Index(format!(
92 "mmap vector file '{}' is too small to contain a header",
93 path.display()
94 )));
95 }
96
97 let mmap = map_mut(&file)?;
98 let header = read_header(&mmap)?;
99 Ok(MmapVectorFile {
100 mmap,
101 header,
102 path: path.to_path_buf(),
103 })
104 }
105
106 pub fn write_vector(&mut self, internal_id: usize, vector: &[f32]) -> VectorResult<()> {
108 let dimensions = self.dimensions();
109 if vector.len() != dimensions {
110 return Err(VectorError::DimensionMismatch {
111 expected: dimensions,
112 got: vector.len(),
113 });
114 }
115
116 let offset = self.vector_offset(internal_id)?;
117 let byte_len = std::mem::size_of_val(vector);
118 LittleEndian::write_f32_into(vector, &mut self.mmap[offset..offset + byte_len]);
119
120 let next_count = internal_id as u64 + 1;
121 if next_count > self.header.element_count {
122 self.header.element_count = next_count;
123 self.sync_header();
124 }
125
126 Ok(())
127 }
128
129 pub fn read_vector(&self, internal_id: usize) -> VectorResult<Vec<f32>> {
131 if internal_id >= self.element_count() {
132 return Err(VectorError::NotFound {
133 entity: "vector".into(),
134 id: internal_id.to_string(),
135 });
136 }
137
138 let offset = self.vector_offset(internal_id)?;
139 let byte_len = self.dimensions() * std::mem::size_of::<f32>();
140 let mut vector = vec![0.0f32; self.dimensions()];
141 LittleEndian::read_f32_into(&self.mmap[offset..offset + byte_len], &mut vector);
142 Ok(vector)
143 }
144
145 pub fn delete_vector(&mut self, internal_id: usize) -> VectorResult<()> {
147 let offset = self.vector_offset(internal_id)?;
148 let byte_len = self.dimensions() * std::mem::size_of::<f32>();
149 self.mmap[offset..offset + byte_len].fill(0);
150 Ok(())
151 }
152
153 pub fn flush(&self) -> VectorResult<()> {
155 self.mmap.flush()?;
156 Ok(())
157 }
158
159 pub fn element_count(&self) -> usize {
161 self.header.element_count as usize
162 }
163
164 pub fn dimensions(&self) -> usize {
166 self.header.dimensions as usize
167 }
168
169 pub fn file_size_bytes(&self) -> u64 {
171 self.mmap.len() as u64
172 }
173
174 fn sync_header(&mut self) {
175 self.mmap[..8].copy_from_slice(&self.header.magic);
176 LittleEndian::write_u32(&mut self.mmap[8..12], self.header.version);
177 LittleEndian::write_u32(&mut self.mmap[12..16], self.header.dimensions);
178 LittleEndian::write_u64(&mut self.mmap[16..24], self.header.element_count);
179 self.mmap[24..HEADER_SIZE].copy_from_slice(&self.header.reserved);
180 }
181
182 fn vector_offset(&self, internal_id: usize) -> VectorResult<usize> {
183 let bytes_per_vector = self.dimensions() * std::mem::size_of::<f32>();
184 let offset = HEADER_SIZE + internal_id * bytes_per_vector;
185 let end = offset + bytes_per_vector;
186 if end > self.mmap.len() {
187 return Err(VectorError::Index(format!(
188 "vector slot {internal_id} exceeds mmap file capacity for '{}'",
189 self.path.display()
190 )));
191 }
192 Ok(offset)
193 }
194}
195
196fn read_header(mmap: &[u8]) -> VectorResult<VecFileHeader> {
197 let mut magic = [0u8; 8];
198 magic.copy_from_slice(&mmap[..8]);
199 if &magic != MAGIC {
200 return Err(VectorError::Index("invalid mmap vector file magic".into()));
201 }
202
203 let version = LittleEndian::read_u32(&mmap[8..12]);
204 if version != VERSION {
205 return Err(VectorError::Index(format!(
206 "unsupported mmap vector file version {version}"
207 )));
208 }
209
210 let dimensions = LittleEndian::read_u32(&mmap[12..16]);
211 if dimensions == 0 {
212 return Err(VectorError::Index(
213 "mmap vector file dimensions must be greater than zero".into(),
214 ));
215 }
216
217 let element_count = LittleEndian::read_u64(&mmap[16..24]);
218 let mut reserved = [0u8; 40];
219 reserved.copy_from_slice(&mmap[24..HEADER_SIZE]);
220
221 Ok(VecFileHeader {
222 magic,
223 version,
224 dimensions,
225 element_count,
226 reserved,
227 })
228}
229
230fn map_mut(file: &std::fs::File) -> VectorResult<MmapMut> {
231 unsafe { MmapOptions::new().map_mut(file).map_err(Into::into) }
234}
235
236#[cfg(test)]
237mod tests {
238 use tempfile::tempdir;
239
240 use super::MmapVectorFile;
241
242 #[test]
243 fn create_write_read_round_trip() {
244 let dir = tempdir().unwrap();
245 let path = dir.path().join("vectors.bin");
246 let mut file = MmapVectorFile::create(&path, 3, 8).unwrap();
247
248 file.write_vector(0, &[1.0, 2.0, 3.0]).unwrap();
249 file.write_vector(3, &[4.0, 5.0, 6.0]).unwrap();
250 file.flush().unwrap();
251
252 assert_eq!(file.read_vector(0).unwrap(), vec![1.0, 2.0, 3.0]);
253 assert_eq!(file.read_vector(3).unwrap(), vec![4.0, 5.0, 6.0]);
254 assert_eq!(file.element_count(), 4);
255 }
256
257 #[test]
258 fn delete_vector_zeros_slot() {
259 let dir = tempdir().unwrap();
260 let path = dir.path().join("vectors.bin");
261 let mut file = MmapVectorFile::create(&path, 2, 4).unwrap();
262
263 file.write_vector(1, &[7.0, 9.0]).unwrap();
264 file.delete_vector(1).unwrap();
265
266 assert_eq!(file.read_vector(1).unwrap(), vec![0.0, 0.0]);
267 }
268
269 #[test]
270 fn open_restores_header_and_data() {
271 let dir = tempdir().unwrap();
272 let path = dir.path().join("vectors.bin");
273 {
274 let mut file = MmapVectorFile::create(&path, 2, 4).unwrap();
275 file.write_vector(2, &[3.5, 8.5]).unwrap();
276 file.flush().unwrap();
277 }
278
279 let reopened = MmapVectorFile::open(&path).unwrap();
280 assert_eq!(reopened.dimensions(), 2);
281 assert_eq!(reopened.element_count(), 3);
282 assert_eq!(reopened.read_vector(2).unwrap(), vec![3.5, 8.5]);
283 }
284}