nodedb_vector/mmap_segment/
writer.rs1use std::path::Path;
6
7use super::format::{
8 DTYPE_F32, FOOTER_SIZE, FORMAT_VERSION, HEADER_SIZE, MAGIC, VectorSegmentCodec, vec_pad,
9};
10
11pub fn write_segment(
20 path: &Path,
21 dim: usize,
22 vectors: &[&[f32]],
23 surrogate_ids: &[u64],
24) -> std::io::Result<()> {
25 use std::io::Write as _;
26
27 debug_assert!(
28 surrogate_ids.is_empty() || surrogate_ids.len() == vectors.len(),
29 "surrogate_ids length must match vectors length or be empty"
30 );
31
32 if let Some(parent) = path.parent() {
33 std::fs::create_dir_all(parent)?;
34 }
35
36 let count = vectors.len() as u64;
37
38 let mut fd = std::fs::OpenOptions::new()
39 .read(true)
40 .write(true)
41 .create(true)
42 .truncate(true)
43 .open(path)?;
44
45 fd.write_all(&MAGIC)?;
47 fd.write_all(&FORMAT_VERSION.to_le_bytes())?;
48 fd.write_all(&0u16.to_le_bytes())?; fd.write_all(&(dim as u32).to_le_bytes())?;
50 fd.write_all(&count.to_le_bytes())?;
51 fd.write_all(&[DTYPE_F32])?;
52 fd.write_all(&[VectorSegmentCodec::None as u8])?;
53 fd.write_all(&[0u8; 10])?; let mut written_vec_bytes: usize = 0;
57 for v in vectors {
58 debug_assert_eq!(v.len(), dim, "vector dimension mismatch during write");
59 let bytes: &[u8] =
60 unsafe { std::slice::from_raw_parts(v.as_ptr() as *const u8, v.len() * 4) };
61 fd.write_all(bytes)?;
62 written_vec_bytes += bytes.len();
63 }
64
65 let pad = vec_pad(written_vec_bytes);
67 if pad > 0 {
68 fd.write_all(&[0u8; 8][..pad])?;
69 }
70
71 for i in 0..vectors.len() {
73 let sid: u64 = surrogate_ids.get(i).copied().unwrap_or(0);
74 fd.write_all(&sid.to_le_bytes())?;
75 }
76
77 fd.sync_all()?;
78 drop(fd);
79
80 let vec_bytes = dim
82 .checked_mul(vectors.len())
83 .and_then(|n| n.checked_mul(4))
84 .ok_or_else(|| {
85 std::io::Error::new(std::io::ErrorKind::InvalidData, "vector data size overflow")
86 })?;
87 let surrogate_bytes = vectors.len().checked_mul(8).ok_or_else(|| {
88 std::io::Error::new(
89 std::io::ErrorKind::InvalidData,
90 "surrogate block size overflow",
91 )
92 })?;
93 let pad_bytes = vec_pad(vec_bytes);
94 let body_len = HEADER_SIZE + vec_bytes + pad_bytes + surrogate_bytes;
95
96 let data = std::fs::read(path)?;
97 if data.len() != body_len {
98 return Err(std::io::Error::other(format!(
99 "unexpected file size after write: {} vs {body_len}",
100 data.len()
101 )));
102 }
103 let checksum = crc32c::crc32c(&data);
104
105 let mut fd = std::fs::OpenOptions::new().append(true).open(path)?;
107
108 let mut created_by = [0u8; 32];
109 let ver = env!("CARGO_PKG_VERSION").as_bytes();
110 let copy_len = ver.len().min(31);
111 created_by[..copy_len].copy_from_slice(&ver[..copy_len]);
112
113 fd.write_all(&FORMAT_VERSION.to_le_bytes())?; fd.write_all(&created_by)?; fd.write_all(&checksum.to_le_bytes())?; fd.write_all(&(FOOTER_SIZE as u32).to_le_bytes())?; fd.write_all(&MAGIC)?; fd.sync_all()?;
119
120 Ok(())
121}