use std::io;
use std::io::Read;
use std::path::Path;
#[derive(Debug, Clone)]
pub struct GziIndex
{
pub entries: Vec<(u64, u64)>,
}
impl GziIndex
{
pub fn from_path(path: &Path) -> io::Result<Self>
{
let mut file = std::fs::File::open(path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
Self::from_bytes(&buffer)
}
pub fn from_bytes(buffer: &[u8]) -> io::Result<Self>
{
if buffer.len() < 8
{
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"GZI data too short (less than 8 bytes)",
));
}
let num_entries = u64::from_le_bytes([
buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5], buffer[6], buffer[7],
]) as usize;
let expected_size = 8 + num_entries * 16;
if buffer.len() < expected_size
{
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"GZI data too short: expected {} bytes, got {}",
expected_size,
buffer.len()
),
));
}
let mut entries = Vec::with_capacity(num_entries);
let mut offset = 8;
for _ in 0..num_entries
{
let compressed = u64::from_le_bytes([
buffer[offset],
buffer[offset + 1],
buffer[offset + 2],
buffer[offset + 3],
buffer[offset + 4],
buffer[offset + 5],
buffer[offset + 6],
buffer[offset + 7],
]);
offset += 8;
let uncompressed = u64::from_le_bytes([
buffer[offset],
buffer[offset + 1],
buffer[offset + 2],
buffer[offset + 3],
buffer[offset + 4],
buffer[offset + 5],
buffer[offset + 6],
buffer[offset + 7],
]);
offset += 8;
entries.push((compressed, uncompressed));
}
for i in 1..entries.len()
{
if entries[i].1 < entries[i - 1].1
{
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"GZI entries not sorted by uncompressed offset",
));
}
}
Ok(GziIndex { entries })
}
pub fn get_compressed_offset(&self, uncompressed_offset: u64) -> Option<u64>
{
if self.entries.is_empty()
{
return None;
}
let result = self
.entries
.binary_search_by(|(_, unc)| unc.cmp(&uncompressed_offset));
match result
{
Ok(i) => Some(self.entries[i].0),
Err(0) => Some(self.entries[0].0),
Err(i) if i >= self.entries.len() => Some(self.entries.last()?.0),
Err(i) => Some(self.entries[i - 1].0),
}
}
pub fn get_uncompressed_offset(&self, compressed_offset: u64) -> Option<u64>
{
if self.entries.is_empty()
{
return None;
}
let result = self
.entries
.binary_search_by(|(comp, _)| comp.cmp(&compressed_offset));
match result
{
Ok(i) => Some(self.entries[i].1),
Err(0) => None,
Err(i) if i >= self.entries.len() => Some(self.entries.last()?.1),
Err(i) => Some(self.entries[i - 1].1),
}
}
pub fn len(&self) -> usize
{
self.entries.len()
}
pub fn is_empty(&self) -> bool
{
self.entries.is_empty()
}
pub fn entries(&self) -> &[(u64, u64)]
{
&self.entries
}
}
#[cfg(test)]
mod tests
{
use super::*;
#[test]
fn test_from_bytes()
{
let data: Vec<u8> = vec![
1, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
let index = GziIndex::from_bytes(&data).unwrap();
assert_eq!(index.len(), 1);
assert_eq!(index.get_compressed_offset(0), Some(100));
}
}