use crate::constants::CHUNK_TYPE_INDEX;
use crate::error::{Error, Result};
const S2_INDEX_HEADER: &[u8] = b"s2idx\x00";
const S2_INDEX_TRAILER: &[u8] = b"\x00xdi2s";
const MAX_INDEX_ENTRIES: usize = 1 << 16;
const MIN_INDEX_DIST: i64 = 1 << 20; const SKIPPABLE_FRAME_HEADER: usize = 4;
#[derive(Debug, Clone, Copy)]
struct IndexEntry {
compressed_offset: i64,
uncompressed_offset: i64,
}
#[derive(Debug, Clone)]
pub struct Index {
pub total_uncompressed: i64,
pub total_compressed: i64,
info: Vec<IndexEntry>,
est_block_uncomp: i64,
}
impl Index {
pub fn new() -> Self {
Index {
total_uncompressed: -1,
total_compressed: -1,
info: Vec::new(),
est_block_uncomp: 0,
}
}
pub fn reset(&mut self, max_block: i64) {
self.est_block_uncomp = max_block;
self.total_compressed = -1;
self.total_uncompressed = -1;
self.info.clear();
}
pub fn add(&mut self, compressed_offset: i64, uncompressed_offset: i64) -> Result<()> {
if let Some(&last) = self.info.last() {
if last.uncompressed_offset == uncompressed_offset {
if let Some(entry) = self.info.last_mut() {
entry.compressed_offset = compressed_offset;
}
return Ok(());
}
if last.uncompressed_offset > uncompressed_offset {
return Err(Error::Corrupt);
}
if last.compressed_offset > compressed_offset {
return Err(Error::Corrupt);
}
if last.uncompressed_offset + MIN_INDEX_DIST > uncompressed_offset {
return Ok(());
}
}
self.info.push(IndexEntry {
compressed_offset,
uncompressed_offset,
});
Ok(())
}
pub fn find(&self, offset: i64) -> Result<(i64, i64)> {
if self.total_uncompressed < 0 {
return Err(Error::Corrupt);
}
let offset = if offset < 0 {
let offset = self.total_uncompressed + offset;
if offset < 0 {
return Err(Error::InvalidInput("offset before start".to_string()));
}
offset
} else {
offset
};
if offset > self.total_uncompressed {
return Err(Error::InvalidInput("offset beyond end".to_string()));
}
if self.info.len() > 200 {
let idx = match self
.info
.binary_search_by_key(&offset, |e| e.uncompressed_offset)
{
Ok(i) => i,
Err(i) => {
if i == 0 {
0
} else {
i - 1
}
}
};
let entry = self.info[idx];
return Ok((entry.compressed_offset, entry.uncompressed_offset));
}
let mut compressed_off = 0;
let mut uncompressed_off = 0;
for entry in &self.info {
if entry.uncompressed_offset > offset {
break;
}
compressed_off = entry.compressed_offset;
uncompressed_off = entry.uncompressed_offset;
}
Ok((compressed_off, uncompressed_off))
}
fn reduce(&mut self) {
if self.info.len() < MAX_INDEX_ENTRIES && self.est_block_uncomp >= MIN_INDEX_DIST {
return;
}
let mut remove_n = (self.info.len() + 1) / MAX_INDEX_ENTRIES;
while self.est_block_uncomp * (remove_n as i64 + 1) < MIN_INDEX_DIST
&& self.info.len() / (remove_n + 1) > 1000
{
remove_n += 1;
}
let mut j = 0;
let mut idx = 0;
while idx < self.info.len() {
self.info[j] = self.info[idx];
j += 1;
idx += remove_n + 1;
}
self.info.truncate(j);
self.est_block_uncomp += self.est_block_uncomp * remove_n as i64;
}
pub fn append_to(
&mut self,
buf: &mut Vec<u8>,
uncomp_total: i64,
comp_total: i64,
) -> Result<()> {
self.reduce();
let init_size = buf.len();
buf.push(CHUNK_TYPE_INDEX);
buf.extend_from_slice(&[0, 0, 0]);
buf.extend_from_slice(S2_INDEX_HEADER);
encode_varint(buf, uncomp_total);
encode_varint(buf, comp_total);
encode_varint(buf, self.est_block_uncomp);
encode_varint(buf, self.info.len() as i64);
let mut has_uncompressed = 0u8;
for (idx, entry) in self.info.iter().enumerate() {
if idx == 0 {
if entry.uncompressed_offset != 0 {
has_uncompressed = 1;
break;
}
continue;
}
let expected = self.info[idx - 1].uncompressed_offset + self.est_block_uncomp;
if entry.uncompressed_offset != expected {
has_uncompressed = 1;
break;
}
}
buf.push(has_uncompressed);
if has_uncompressed == 1 {
for (idx, entry) in self.info.iter().enumerate() {
let u_off = if idx == 0 {
entry.uncompressed_offset
} else {
let prev = self.info[idx - 1];
entry.uncompressed_offset - prev.uncompressed_offset - self.est_block_uncomp
};
encode_varint(buf, u_off);
}
}
let mut c_predict = self.est_block_uncomp / 2;
for (idx, entry) in self.info.iter().enumerate() {
let c_off = if idx == 0 {
entry.compressed_offset
} else {
let prev = self.info[idx - 1];
let off = entry.compressed_offset - prev.compressed_offset - c_predict;
c_predict += off / 2;
off
};
encode_varint(buf, c_off);
}
let total_size = (buf.len() - init_size + 4 + S2_INDEX_TRAILER.len()) as u32;
buf.extend_from_slice(&total_size.to_le_bytes());
buf.extend_from_slice(S2_INDEX_TRAILER);
let chunk_len = buf.len() - init_size - SKIPPABLE_FRAME_HEADER;
buf[init_size + 1] = (chunk_len & 0xff) as u8;
buf[init_size + 2] = ((chunk_len >> 8) & 0xff) as u8;
buf[init_size + 3] = ((chunk_len >> 16) & 0xff) as u8;
Ok(())
}
pub fn load<'a>(&mut self, data: &'a [u8]) -> Result<&'a [u8]> {
if data.len() <= 4 + S2_INDEX_HEADER.len() + S2_INDEX_TRAILER.len() {
return Err(Error::BufferTooSmall);
}
if data[0] != CHUNK_TYPE_INDEX {
return Err(Error::Corrupt);
}
let chunk_len = data[1] as usize | (data[2] as usize) << 8 | (data[3] as usize) << 16;
let mut b = &data[4..];
if b.len() < chunk_len {
return Err(Error::BufferTooSmall);
}
if !b.starts_with(S2_INDEX_HEADER) {
return Err(Error::Unsupported);
}
b = &b[S2_INDEX_HEADER.len()..];
let (v, n) = decode_varint(b)?;
if v < 0 {
return Err(Error::Corrupt);
}
self.total_uncompressed = v;
b = &b[n..];
let (v, n) = decode_varint(b)?;
self.total_compressed = v;
b = &b[n..];
let (v, n) = decode_varint(b)?;
if v < 0 {
return Err(Error::Corrupt);
}
self.est_block_uncomp = v;
b = &b[n..];
let (v, n) = decode_varint(b)?;
if v < 0 || v > MAX_INDEX_ENTRIES as i64 {
return Err(Error::Corrupt);
}
let entries = v as usize;
b = &b[n..];
self.info.clear();
self.info.reserve(entries);
if b.is_empty() {
return Err(Error::Corrupt);
}
let has_uncompressed = b[0];
b = &b[1..];
let mut uncomp_offsets = Vec::with_capacity(entries);
if has_uncompressed == 1 {
for i in 0..entries {
let (v, n) = decode_varint(b)?;
b = &b[n..];
let uncomp_off = if i == 0 {
v
} else {
uncomp_offsets[i - 1] + self.est_block_uncomp + v
};
uncomp_offsets.push(uncomp_off);
}
} else {
for i in 0..entries {
uncomp_offsets.push(i as i64 * self.est_block_uncomp);
}
}
let mut c_predict = self.est_block_uncomp / 2;
for (i, &uncomp_off) in uncomp_offsets.iter().enumerate() {
let (v, n) = decode_varint(b)?;
b = &b[n..];
let comp_off = if i == 0 {
v
} else {
let prev_comp = self.info[i - 1].compressed_offset;
c_predict += v / 2;
prev_comp + c_predict + v
};
self.info.push(IndexEntry {
compressed_offset: comp_off,
uncompressed_offset: uncomp_off,
});
}
if b.len() < 4 + S2_INDEX_TRAILER.len() {
return Err(Error::Corrupt);
}
let total_size_pos = b.len() - S2_INDEX_TRAILER.len() - 4;
let trailer_pos = b.len() - S2_INDEX_TRAILER.len();
if &b[trailer_pos..trailer_pos + S2_INDEX_TRAILER.len()] != S2_INDEX_TRAILER {
return Err(Error::Corrupt);
}
let remaining = &b[total_size_pos + 4 + S2_INDEX_TRAILER.len()..];
Ok(remaining)
}
}
impl Default for Index {
fn default() -> Self {
Self::new()
}
}
fn encode_varint(buf: &mut Vec<u8>, value: i64) {
let uvalue = ((value << 1) ^ (value >> 63)) as u64;
let mut v = uvalue;
while v >= 0x80 {
buf.push((v as u8) | 0x80);
v >>= 7;
}
buf.push(v as u8);
}
fn decode_varint(data: &[u8]) -> Result<(i64, usize)> {
let mut result: u64 = 0;
let mut shift = 0;
let mut i = 0;
while i < data.len() && i < 10 {
let byte = data[i];
result |= ((byte & 0x7f) as u64) << shift;
i += 1;
if byte < 0x80 {
let value = ((result >> 1) as i64) ^ -((result & 1) as i64);
return Ok((value, i));
}
shift += 7;
}
Err(Error::Corrupt)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_index_basic() {
let mut index = Index::new();
index.reset(1024 * 1024); index.total_uncompressed = 10 * 1024 * 1024;
assert!(index.add(0, 0).is_ok());
assert!(index.add(500_000, 1024 * 1024).is_ok()); assert!(index.add(1_000_000, 2 * 1024 * 1024).is_ok()); assert!(index.add(1_500_000, 3 * 1024 * 1024).is_ok());
let (c, u) = index.find(0).unwrap();
assert_eq!(c, 0);
assert_eq!(u, 0);
let (c, u) = index.find(1024 * 1024).unwrap();
assert_eq!(c, 500_000);
assert_eq!(u, 1024 * 1024);
let (c, u) = index.find(1024 * 1024 + 500_000).unwrap();
assert_eq!(c, 500_000);
assert_eq!(u, 1024 * 1024);
}
#[test]
fn test_varint_roundtrip() {
let test_values = vec![0, 1, -1, 127, -127, 128, -128, 65535, -65535];
for &val in &test_values {
let mut buf = Vec::new();
encode_varint(&mut buf, val);
let (decoded, n) = decode_varint(&buf).unwrap();
assert_eq!(decoded, val, "Failed for value {}", val);
assert_eq!(n, buf.len());
}
}
}