use std::{
fs::File,
io::{Cursor, Read, Write},
path::Path,
};
use byteorder::{ByteOrder, LittleEndian};
use zstd::{Decoder, Encoder};
use super::{
BlockHeader, FileHeader,
header::{SIZE_BLOCK_HEADER, SIZE_HEADER},
};
use crate::error::{IndexError, Result};
pub const SIZE_BLOCK_RANGE: usize = 32;
pub const INDEX_HEADER_SIZE: usize = 32;
#[allow(clippy::unreadable_literal)]
pub const INDEX_MAGIC: u64 = 0x5845444e49514256;
#[allow(clippy::unreadable_literal)]
pub const INDEX_END_MAGIC: u64 = 0x444E455845444E49;
pub const INDEX_RESERVATION: [u8; 4] = [42; 4];
#[derive(Debug, Clone, Copy)]
pub struct BlockRange {
pub start_offset: u64,
pub len: u64,
pub block_records: u32,
pub cumulative_records: u64,
pub reservation: [u8; 4],
}
impl BlockRange {
#[must_use]
pub fn new(start_offset: u64, len: u64, block_records: u32, cumulative_records: u64) -> Self {
Self {
start_offset,
len,
block_records,
cumulative_records,
reservation: INDEX_RESERVATION,
}
}
pub fn write_bytes<W: Write>(&self, writer: &mut W) -> Result<()> {
let mut buf = [0; SIZE_BLOCK_RANGE];
LittleEndian::write_u64(&mut buf[0..8], self.start_offset);
LittleEndian::write_u64(&mut buf[8..16], self.len);
LittleEndian::write_u32(&mut buf[16..20], self.block_records);
LittleEndian::write_u64(&mut buf[20..28], self.cumulative_records);
buf[28..].copy_from_slice(&self.reservation);
writer.write_all(&buf)?;
Ok(())
}
#[must_use]
pub fn from_exact(buffer: &[u8; SIZE_BLOCK_RANGE]) -> Self {
Self {
start_offset: LittleEndian::read_u64(&buffer[0..8]),
len: LittleEndian::read_u64(&buffer[8..16]),
block_records: LittleEndian::read_u32(&buffer[16..20]),
cumulative_records: LittleEndian::read_u64(&buffer[20..28]),
reservation: INDEX_RESERVATION,
}
}
#[must_use]
pub fn from_bytes(buffer: &[u8]) -> Self {
let mut buf = [0; SIZE_BLOCK_RANGE];
buf.copy_from_slice(buffer);
Self::from_exact(&buf)
}
}
#[derive(Debug, Clone, Copy)]
pub struct IndexHeader {
magic: u64,
bytes: u64,
reserved: [u8; INDEX_HEADER_SIZE - 16],
}
impl IndexHeader {
pub fn new(bytes: u64) -> Self {
Self {
magic: INDEX_MAGIC,
bytes,
reserved: [42; INDEX_HEADER_SIZE - 16],
}
}
pub fn from_reader<R: Read>(reader: &mut R) -> Result<Self> {
let mut buffer = [0; INDEX_HEADER_SIZE];
reader.read_exact(&mut buffer)?;
let magic = LittleEndian::read_u64(&buffer[0..8]);
let bytes = LittleEndian::read_u64(&buffer[8..16]);
let Ok(reserved) = buffer[16..INDEX_HEADER_SIZE].try_into() else {
return Err(IndexError::InvalidReservedBytes.into());
};
if magic != INDEX_MAGIC {
return Err(IndexError::InvalidMagicNumber(magic).into());
}
Ok(Self {
magic,
bytes,
reserved,
})
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
let mut buffer = [0; INDEX_HEADER_SIZE];
buffer.copy_from_slice(&bytes[..INDEX_HEADER_SIZE]);
Self::from_reader(&mut Cursor::new(buffer))
}
pub fn write_bytes<W: Write>(&self, writer: &mut W) -> Result<()> {
let mut buffer = [0; INDEX_HEADER_SIZE];
LittleEndian::write_u64(&mut buffer[0..8], self.magic);
LittleEndian::write_u64(&mut buffer[8..16], self.bytes);
buffer[16..].copy_from_slice(&self.reserved);
writer.write_all(&buffer)?;
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct BlockIndex {
pub(crate) header: IndexHeader,
pub(crate) ranges: Vec<BlockRange>,
}
impl BlockIndex {
#[must_use]
pub fn new(header: IndexHeader) -> Self {
Self {
header,
ranges: Vec::default(),
}
}
#[must_use]
pub fn n_blocks(&self) -> usize {
self.ranges.len()
}
pub fn write_bytes<W: Write>(&self, writer: &mut W) -> Result<()> {
self.header.write_bytes(writer)?;
let mut writer = Encoder::new(writer, 3)?.auto_finish();
self.write_range(&mut writer)?;
writer.flush()?;
Ok(())
}
pub fn write_range<W: Write>(&self, writer: &mut W) -> Result<()> {
self.ranges
.iter()
.filter(|range| range.block_records > 0)
.try_for_each(|range| -> Result<()> { range.write_bytes(writer) })
}
fn add_range(&mut self, range: BlockRange) {
self.ranges.push(range);
}
pub fn from_vbq<P: AsRef<Path>>(path: P) -> Result<Self> {
let file = File::open(path)?;
let mmap = unsafe { memmap2::Mmap::map(&file)? };
let file_size = mmap.len();
let _header = {
let mut header_bytes = [0u8; SIZE_HEADER];
header_bytes.copy_from_slice(&mmap[..SIZE_HEADER]);
FileHeader::from_bytes(&header_bytes)?
};
let mut pos = SIZE_HEADER;
let index_header = IndexHeader::new(file_size as u64);
let mut index = BlockIndex::new(index_header);
let mut record_total = 0;
while pos < mmap.len() {
let block_header = {
let mut header_bytes = [0u8; SIZE_BLOCK_HEADER];
header_bytes.copy_from_slice(&mmap[pos..pos + SIZE_BLOCK_HEADER]);
BlockHeader::from_bytes(&header_bytes)?
};
index.add_range(BlockRange::new(
pos as u64,
block_header.size,
block_header.records,
record_total,
));
pos += SIZE_BLOCK_HEADER + block_header.size as usize;
record_total += u64::from(block_header.records);
}
Ok(index)
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
let index_header = IndexHeader::from_bytes(bytes)?;
let buffer = {
let mut buffer = Vec::new();
let mut decoder = Decoder::new(Cursor::new(&bytes[INDEX_HEADER_SIZE..]))?;
decoder.read_to_end(&mut buffer)?;
buffer
};
let mut ranges = Self::new(index_header);
let mut pos = 0;
while pos < buffer.len() {
let bound = pos + SIZE_BLOCK_RANGE;
let range = BlockRange::from_bytes(&buffer[pos..bound]);
ranges.add_range(range);
pos += SIZE_BLOCK_RANGE;
}
Ok(ranges)
}
#[must_use]
pub fn ranges(&self) -> &[BlockRange] {
&self.ranges
}
pub fn pprint(&self) {
self.ranges.iter().for_each(|range| {
println!(
"{}\t{}\t{}\t{}",
range.start_offset, range.len, range.block_records, range.cumulative_records
);
});
}
#[must_use]
pub fn num_records(&self) -> usize {
self.ranges
.iter()
.next_back()
.map(|r| (r.cumulative_records + u64::from(r.block_records)) as usize)
.unwrap_or_default()
}
}