use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
#[test]
fn test_build_and_seek_random_data() {
let data: Vec<u8> = (0..200 * 1024)
.map(|i| ((i ^ (i >> 8) ^ (i >> 16)) % 256) as u8)
.collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 10 * 1024)
.expect("Failed to build index");
assert_eq!(
index.total_uncompressed_size,
data.len() as u64,
"Index total size mismatch"
);
for i in 1..index.points.len() {
assert!(
index.points[i].uncompressed_offset > index.points[i - 1].uncompressed_offset,
"Checkpoints should be monotonically increasing"
);
}
}
#[test]
fn test_seek_at_first_checkpoint() {
let data: Vec<u8> = (0..100 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 10 * 1024)
.expect("Failed to build index");
if let Some(first_checkpoint) = index.points.first() {
let offset = first_checkpoint.uncompressed_offset;
let mut output = Vec::new();
crate::decompress::index::seek_decompress(
&compressed,
&index,
offset,
u64::MAX,
&mut output,
)
.expect("Seek decompress failed");
let expected = &data[offset as usize..];
assert_eq!(
&output[..],
expected,
"Seek to first checkpoint should match"
);
}
}
#[test]
fn test_seek_at_second_checkpoint() {
let data: Vec<u8> = (0..200 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 30 * 1024)
.expect("Failed to build index");
if index.points.len() >= 2 {
let offset = index.points[1].uncompressed_offset;
let mut output = Vec::new();
crate::decompress::index::seek_decompress(
&compressed,
&index,
offset,
u64::MAX,
&mut output,
)
.expect("Seek decompress failed");
let expected = &data[offset as usize..];
assert_eq!(
&output[..],
expected,
"Seek to second checkpoint should match"
);
}
}
#[test]
fn test_index_serialize_roundtrip() {
let data: Vec<u8> = (0..50 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index1 = crate::decompress::index::build_index(&compressed, 10 * 1024)
.expect("Failed to build index");
let mut serialized = Vec::new();
crate::decompress::index::serialize_index(&index1, &mut serialized)
.expect("Failed to serialize index");
let index2 = crate::decompress::index::load_index(&serialized).expect("Failed to load index");
assert_eq!(
index1.total_uncompressed_size, index2.total_uncompressed_size,
"Total size mismatch"
);
assert_eq!(
index1.deflate_offset, index2.deflate_offset,
"Deflate offset mismatch"
);
assert_eq!(
index1.points.len(),
index2.points.len(),
"Point count mismatch"
);
for (i, (p1, p2)) in index1.points.iter().zip(index2.points.iter()).enumerate() {
assert_eq!(
p1.compressed_bit_offset, p2.compressed_bit_offset,
"Point {} compressed offset mismatch",
i
);
assert_eq!(
p1.uncompressed_offset, p2.uncompressed_offset,
"Point {} uncompressed offset mismatch",
i
);
assert_eq!(
&p1.window[..],
&p2.window[..],
"Point {} window mismatch",
i
);
}
}
#[test]
fn test_build_index_multi_member() {
let part1: Vec<u8> = (0..50_000).map(|i| (i % 256) as u8).collect();
let part2: Vec<u8> = (0..50_000).map(|i| ((i + 50) % 256) as u8).collect();
let mut enc1 = GzEncoder::new(Vec::new(), Compression::default());
enc1.write_all(&part1).unwrap();
let compressed1 = enc1.finish().unwrap();
let mut enc2 = GzEncoder::new(Vec::new(), Compression::default());
enc2.write_all(&part2).unwrap();
let compressed2 = enc2.finish().unwrap();
let mut multi = compressed1;
multi.extend_from_slice(&compressed2);
let result = crate::decompress::index::build_index(&multi, 10 * 1024);
assert!(result.is_err(), "Should reject multi-member gzip files");
}
#[test]
fn test_seek_with_max_bytes_limit() {
let data: Vec<u8> = (0..100 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 10 * 1024)
.expect("Failed to build index");
if let Some(checkpoint) = index.points.first() {
let seek_offset = checkpoint.uncompressed_offset;
let max_bytes = 10 * 1024;
let mut output = Vec::new();
let bytes_read = crate::decompress::index::seek_decompress(
&compressed,
&index,
seek_offset,
max_bytes as u64,
&mut output,
)
.expect("Seek decompress failed");
assert_eq!(
bytes_read, max_bytes as u64,
"Should read exactly max_bytes"
);
assert_eq!(
output.len(),
max_bytes,
"Output should be max_bytes in size"
);
let expected_offset = seek_offset as usize;
let expected = &data[expected_offset..expected_offset + max_bytes];
assert_eq!(&output[..], expected, "Seek with limit output mismatch");
}
}
#[test]
fn test_seek_offset_equals_checkpoint() {
let data: Vec<u8> = (0..150 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 20 * 1024)
.expect("Failed to build index");
for checkpoint in index.points.iter() {
let mut output = Vec::new();
crate::decompress::index::seek_decompress(
&compressed,
&index,
checkpoint.uncompressed_offset,
u64::MAX,
&mut output,
)
.expect("Seek decompress failed");
let expected_offset = checkpoint.uncompressed_offset as usize;
let expected = &data[expected_offset..];
assert_eq!(
&output[..],
expected,
"Seek to checkpoint {} should match",
checkpoint.uncompressed_offset
);
}
}
#[test]
fn test_seek_from_first_checkpoint_to_end() {
let data: Vec<u8> = (0..50 * 1024)
.map(|i| ((i ^ (i >> 8)) % 256) as u8)
.collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 10 * 1024)
.expect("Failed to build index");
if let Some(first_checkpoint) = index.points.first() {
let seek_offset = first_checkpoint.uncompressed_offset;
let mut output = Vec::new();
crate::decompress::index::seek_decompress(
&compressed,
&index,
seek_offset,
u64::MAX,
&mut output,
)
.expect("Seek to first checkpoint failed");
let expected = &data[seek_offset as usize..];
assert_eq!(
&output[..],
expected,
"Seeking to first checkpoint should decompress rest of file"
);
}
}
#[test]
fn test_empty_gzip_file() {
let encoder = GzEncoder::new(Vec::new(), Compression::default());
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 10 * 1024)
.expect("Failed to build index for empty file");
assert_eq!(
index.total_uncompressed_size, 0,
"Empty file should have size 0"
);
}
#[test]
fn test_very_small_file() {
let data = b"hello world tiny file";
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 1024)
.expect("Failed to build index for small file");
assert_eq!(
index.total_uncompressed_size,
data.len() as u64,
"Small file size should match"
);
assert!(
!index.points.is_empty(),
"Should have at least one checkpoint"
);
let mut output = Vec::new();
crate::decompress::index::seek_decompress(&compressed, &index, 0, u64::MAX, &mut output)
.expect("Seek to start of small file should succeed");
assert_eq!(
&output[..],
data,
"Small file decompress should match original"
);
}
#[test]
fn test_seek_to_offset_zero() {
let data: Vec<u8> = (0..100 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 20 * 1024)
.expect("Failed to build index");
let mut output = Vec::new();
crate::decompress::index::seek_decompress(&compressed, &index, 0, u64::MAX, &mut output)
.expect("Seek to offset 0 should succeed");
assert_eq!(
&output[..],
&data[..],
"Seek to offset 0 should decompress entire file"
);
}
#[test]
fn test_seek_out_of_bounds() {
let data: Vec<u8> = (0..50 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 10 * 1024)
.expect("Failed to build index");
let file_size = data.len() as u64;
let mut output = Vec::new();
let result = crate::decompress::index::seek_decompress(
&compressed,
&index,
file_size + 1000,
u64::MAX,
&mut output,
);
match result {
Ok(bytes) => {
assert_eq!(
bytes, 0,
"Seeking beyond EOF should return 0 bytes if it succeeds"
);
assert!(
output.is_empty(),
"Output should be empty for out-of-bounds seek"
);
}
Err(_) => {
}
}
}
#[test]
fn test_seek_between_checkpoints() {
let data: Vec<u8> = (0..200 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 30 * 1024)
.expect("Failed to build index");
if index.points.len() >= 2 {
let offset1 = index.points[0].uncompressed_offset as usize;
let offset2 = index.points[1].uncompressed_offset as usize;
let midpoint = offset1 + (offset2 - offset1) / 2;
let mut output = Vec::new();
crate::decompress::index::seek_decompress(
&compressed,
&index,
midpoint as u64,
u64::MAX,
&mut output,
)
.expect("Seek to midpoint should succeed");
let expected = &data[midpoint..];
assert_eq!(
&output[..],
expected,
"Seek between checkpoints should work"
);
}
}
#[test]
fn test_index_corrupt_magic_bytes() {
let mut fake_index = Vec::new();
fake_index.extend_from_slice(b"BADIDX\x01");
fake_index.extend_from_slice(&(100u32).to_le_bytes()); fake_index.extend_from_slice(&(50000u64).to_le_bytes()); fake_index.extend_from_slice(&(0u32).to_le_bytes()); fake_index.extend_from_slice(&[0u8; 2]);
let result = crate::decompress::index::load_index(&fake_index);
assert!(result.is_err(), "Should reject index with bad magic bytes");
}
#[test]
fn test_index_truncated_header() {
let mut fake_index = Vec::new();
fake_index.extend_from_slice(b"GZIDX\x01");
fake_index.extend_from_slice(&(100u32).to_le_bytes());
let result = crate::decompress::index::load_index(&fake_index);
assert!(result.is_err(), "Should reject truncated index header");
}
#[test]
fn test_index_truncated_points() {
let mut fake_index = Vec::new();
fake_index.extend_from_slice(b"GZIDX\x01");
fake_index.extend_from_slice(&(100u32).to_le_bytes()); fake_index.extend_from_slice(&(50000u64).to_le_bytes()); fake_index.extend_from_slice(&(2u32).to_le_bytes()); fake_index.extend_from_slice(&[0u8; 2]); fake_index.extend_from_slice(&[0u8; 1000]);
let result = crate::decompress::index::load_index(&fake_index);
assert!(result.is_err(), "Should reject index with truncated points");
}
#[test]
fn test_index_invalid_version() {
let mut fake_index = Vec::new();
fake_index.extend_from_slice(b"GZIDX\x99"); fake_index.extend_from_slice(&(100u32).to_le_bytes());
fake_index.extend_from_slice(&(50000u64).to_le_bytes());
fake_index.extend_from_slice(&(0u32).to_le_bytes());
fake_index.extend_from_slice(&[0u8; 2]);
let result = crate::decompress::index::load_index(&fake_index);
assert!(result.is_err(), "Should reject index with wrong version");
}
#[test]
fn test_zero_interval_bytes() {
let data: Vec<u8> = (0..10 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 0);
if let Ok(idx) = index {
assert!(
!idx.points.is_empty(),
"Should have at least one checkpoint"
);
}
}
#[test]
fn test_checkpoint_window_initialization() {
let data: Vec<u8> = (0..100 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 10 * 1024)
.expect("Failed to build index");
for (i, point) in index.points.iter().enumerate() {
if i > 0 && point.uncompressed_offset > 0 {
let all_zeros = point.window.iter().all(|b| *b == 0);
if !all_zeros {
}
}
}
}
#[test]
fn test_checkpoint_monotonicity_strict() {
let data: Vec<u8> = (0..300 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 50 * 1024)
.expect("Failed to build index");
let mut prev_offset = 0u64;
for (i, point) in index.points.iter().enumerate() {
assert!(
point.uncompressed_offset >= prev_offset,
"Checkpoint {} offset {} should be >= previous {}",
i,
point.uncompressed_offset,
prev_offset
);
prev_offset = point.uncompressed_offset;
if i > 0 {
assert!(
point.compressed_bit_offset > index.points[i - 1].compressed_bit_offset,
"Compressed offset should strictly increase at checkpoint {}",
i
);
}
}
if let Some(last) = index.points.last() {
assert!(
last.uncompressed_offset <= index.total_uncompressed_size,
"Last checkpoint offset should not exceed total size"
);
}
}
#[test]
fn test_single_checkpoint_behavior() {
let data = b"The quick brown fox jumps over the lazy dog. This is a test.";
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 1024 * 1024)
.expect("Failed to build index");
assert!(
!index.points.is_empty(),
"Should have at least one checkpoint"
);
assert_eq!(
index.total_uncompressed_size,
data.len() as u64,
"Total size should match"
);
assert_eq!(
index.points[0].uncompressed_offset, 0,
"First checkpoint should be at offset 0"
);
let mut output = Vec::new();
crate::decompress::index::seek_decompress(&compressed, &index, 0, u64::MAX, &mut output)
.expect("Seek to 0 should work");
assert_eq!(&output[..], data, "Output should match original");
}
#[test]
fn test_large_file_many_checkpoints() {
let data: Vec<u8> = (0..5 * 1024 * 1024).map(|i| (i % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&data).unwrap();
let compressed = encoder.finish().unwrap();
let index = crate::decompress::index::build_index(&compressed, 512 * 1024)
.expect("Failed to build index");
assert!(
!index.points.is_empty(),
"Large 5MB file should have checkpoints"
);
assert_eq!(
index.total_uncompressed_size,
(5 * 1024 * 1024) as u64,
"Total size should match input"
);
for (i, checkpoint) in index.points.iter().enumerate() {
assert!(
checkpoint.uncompressed_offset <= index.total_uncompressed_size,
"Checkpoint {} offset {} should be within file bounds",
i,
checkpoint.uncompressed_offset
);
}
}