pub const FOOTER_SIZE: u64 = 16;
pub const VECTORS_FOOTER_MAGIC: u32 = 0x32434556;
pub const FLAT_BINARY_MAGIC: u32 = 0x46564433;
pub const FLAT_BINARY_HEADER_SIZE: usize = 16;
pub const DOC_ID_ENTRY_SIZE: usize = std::mem::size_of::<u32>() + std::mem::size_of::<u16>();
pub struct DenseVectorTocEntry {
pub field_id: u32,
pub index_type: u8,
pub offset: u64,
pub size: u64,
}
pub const DENSE_TOC_ENTRY_SIZE: u64 = 4 + 1 + 8 + 8;
pub fn write_dense_toc_and_footer(
writer: &mut (impl std::io::Write + ?Sized),
toc_offset: u64,
entries: &[DenseVectorTocEntry],
) -> std::io::Result<()> {
use byteorder::{LittleEndian, WriteBytesExt};
for e in entries {
writer.write_u32::<LittleEndian>(e.field_id)?;
writer.write_u8(e.index_type)?;
writer.write_u64::<LittleEndian>(e.offset)?;
writer.write_u64::<LittleEndian>(e.size)?;
}
writer.write_u64::<LittleEndian>(toc_offset)?;
writer.write_u32::<LittleEndian>(entries.len() as u32)?;
writer.write_u32::<LittleEndian>(VECTORS_FOOTER_MAGIC)?;
Ok(())
}
pub fn read_dense_toc(
toc_bytes: &[u8],
num_fields: u32,
) -> std::io::Result<Vec<DenseVectorTocEntry>> {
use byteorder::{LittleEndian, ReadBytesExt};
let mut cursor = std::io::Cursor::new(toc_bytes);
let mut entries = Vec::with_capacity(num_fields as usize);
for _ in 0..num_fields {
let field_id = cursor.read_u32::<LittleEndian>()?;
let index_type = cursor.read_u8().unwrap_or(255);
let offset = cursor.read_u64::<LittleEndian>()?;
let size = cursor.read_u64::<LittleEndian>()?;
entries.push(DenseVectorTocEntry {
field_id,
index_type,
offset,
size,
});
}
Ok(entries)
}
pub const SPARSE_FOOTER_MAGIC: u32 = 0x34525053;
pub const BMP_BLOB_MAGIC_V13: u32 = 0x33504D42;
pub const BMP_BLOB_FOOTER_SIZE_V13: usize = 64;
pub const SPARSE_FOOTER_SIZE: u64 = 24;
pub struct SparseDimTocEntry {
pub dim_id: u32,
pub block_data_offset: u64,
pub skip_start: u32,
pub num_blocks: u32,
pub doc_count: u32,
pub max_weight: f32,
}
pub struct SparseFieldToc {
pub field_id: u32,
pub quantization: u8,
pub total_vectors: u32,
pub dims: Vec<SparseDimTocEntry>,
}
pub fn write_sparse_toc_and_footer(
writer: &mut (impl std::io::Write + ?Sized),
skip_offset: u64,
toc_offset: u64,
field_tocs: &[SparseFieldToc],
) -> std::io::Result<()> {
use byteorder::{LittleEndian, WriteBytesExt};
for ftoc in field_tocs {
writer.write_u32::<LittleEndian>(ftoc.field_id)?;
writer.write_u8(ftoc.quantization)?;
writer.write_u32::<LittleEndian>(ftoc.dims.len() as u32)?;
writer.write_u32::<LittleEndian>(ftoc.total_vectors)?;
for d in &ftoc.dims {
writer.write_u32::<LittleEndian>(d.dim_id)?;
writer.write_u64::<LittleEndian>(d.block_data_offset)?;
writer.write_u32::<LittleEndian>(d.skip_start)?;
writer.write_u32::<LittleEndian>(d.num_blocks)?;
writer.write_u32::<LittleEndian>(d.doc_count)?;
writer.write_f32::<LittleEndian>(d.max_weight)?;
}
}
writer.write_u64::<LittleEndian>(skip_offset)?;
writer.write_u64::<LittleEndian>(toc_offset)?;
writer.write_u32::<LittleEndian>(field_tocs.len() as u32)?;
writer.write_u32::<LittleEndian>(SPARSE_FOOTER_MAGIC)?;
Ok(())
}