use crate::constants::magic;
use crate::error::ShardexError;
use crate::identifiers::DocumentId;
use crate::memory::{FileHeader, MemoryMappedFile};
use bytemuck::{Pod, Zeroable};
use std::path::Path;
#[derive(Debug, Clone, Copy, PartialEq)]
#[repr(C)]
pub struct PostingStorageHeader {
pub file_header: FileHeader,
pub capacity: u32,
pub current_count: u32,
pub active_count: u32,
pub document_ids_offset: u64,
pub starts_offset: u64,
pub lengths_offset: u64,
pub deleted_flags_offset: u64,
pub document_id_size: u32,
pub reserved: [u8; 12],
}
pub struct PostingStorage {
mmap_file: MemoryMappedFile,
header: PostingStorageHeader,
capacity: usize,
read_only: bool,
}
unsafe impl Pod for PostingStorageHeader {}
unsafe impl Zeroable for PostingStorageHeader {}
const POSTING_STORAGE_VERSION: u32 = 1;
impl PostingStorageHeader {
pub const SIZE: usize = std::mem::size_of::<PostingStorageHeader>();
pub fn new(capacity: usize) -> Result<Self, ShardexError> {
if capacity == 0 {
return Err(ShardexError::Config("Capacity cannot be zero".to_string()));
}
if capacity > u32::MAX as usize {
return Err(ShardexError::Config(format!(
"Capacity {} exceeds maximum {}",
capacity,
u32::MAX
)));
}
let header_size = Self::SIZE;
let document_ids_offset = header_size as u64;
let document_ids_size = capacity * 16;
let starts_offset = document_ids_offset + document_ids_size as u64;
let starts_size = capacity * 4;
let lengths_offset = starts_offset + starts_size as u64;
let lengths_size = capacity * 4;
let deleted_flags_offset = lengths_offset + lengths_size as u64;
Ok(Self {
file_header: FileHeader::new_without_checksum(
magic::POSTING_STORAGE,
POSTING_STORAGE_VERSION,
FileHeader::SIZE as u64,
),
capacity: capacity as u32,
current_count: 0,
active_count: 0,
document_ids_offset,
starts_offset,
lengths_offset,
deleted_flags_offset,
document_id_size: 16,
reserved: [0; 12],
})
}
pub fn validate(&self) -> Result<(), ShardexError> {
self.file_header.validate_magic(magic::POSTING_STORAGE)?;
if self.file_header.version != POSTING_STORAGE_VERSION {
return Err(ShardexError::Corruption(format!(
"Unsupported posting storage version: expected {}, found {}",
POSTING_STORAGE_VERSION, self.file_header.version
)));
}
if self.document_id_size != 16 {
return Err(ShardexError::Corruption(format!(
"Invalid document ID size: expected 16 bytes, found {}",
self.document_id_size
)));
}
if self.current_count > self.capacity {
return Err(ShardexError::Corruption(format!(
"Current count {} exceeds capacity {}",
self.current_count, self.capacity
)));
}
if self.active_count > self.current_count {
return Err(ShardexError::Corruption(format!(
"Active count {} exceeds current count {}",
self.active_count, self.current_count
)));
}
Ok(())
}
pub fn update_checksum(&mut self, posting_data: &[u8]) {
self.file_header.update_checksum(posting_data);
}
pub fn calculate_file_size(&self) -> usize {
let capacity = self.capacity as usize;
let document_ids_size = capacity * 16; let starts_size = capacity * 4; let lengths_size = capacity * 4; let deleted_flags_size = (capacity + 7) / 8;
Self::SIZE + document_ids_size + starts_size + lengths_size + deleted_flags_size
}
}
impl PostingStorage {
pub fn create<P: AsRef<Path>>(path: P, capacity: usize) -> Result<Self, ShardexError> {
let path = path.as_ref();
let mut header = PostingStorageHeader::new(capacity)?;
let total_size = header.calculate_file_size();
let mut mmap_file = MemoryMappedFile::create(path, total_size)?;
let data_size = total_size - PostingStorageHeader::SIZE;
let zero_data = vec![0u8; data_size];
header.update_checksum(&zero_data);
mmap_file.write_at(0, &header)?;
mmap_file.write_slice_at(PostingStorageHeader::SIZE, &zero_data)?;
mmap_file.sync()?;
Ok(Self {
mmap_file,
header,
capacity,
read_only: false,
})
}
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, ShardexError> {
Self::open_with_mode(path, false)
}
pub fn open_read_only<P: AsRef<Path>>(path: P) -> Result<Self, ShardexError> {
Self::open_with_mode(path, true)
}
fn open_with_mode<P: AsRef<Path>>(path: P, read_only: bool) -> Result<Self, ShardexError> {
let path = path.as_ref();
let mmap_file = if read_only {
MemoryMappedFile::open_read_only(path)?
} else {
MemoryMappedFile::open_read_write(path)?
};
let header: PostingStorageHeader = mmap_file.read_at(0)?;
header.validate()?;
let expected_size = header.calculate_file_size();
if mmap_file.len() < expected_size {
return Err(ShardexError::Corruption(
"File too small for declared posting capacity".to_string(),
));
}
let data_start = PostingStorageHeader::SIZE;
let data_size = expected_size - PostingStorageHeader::SIZE;
let data = &mmap_file.as_slice()[data_start..data_start + data_size];
header.file_header.validate_checksum(data)?;
let capacity = header.capacity as usize;
Ok(Self {
mmap_file,
header,
capacity,
read_only,
})
}
pub fn capacity(&self) -> usize {
self.capacity
}
pub fn current_count(&self) -> usize {
self.header.current_count as usize
}
pub fn active_count(&self) -> usize {
self.header.active_count as usize
}
pub fn is_read_only(&self) -> bool {
self.read_only
}
pub fn is_full(&self) -> bool {
self.current_count() >= self.capacity()
}
pub fn remaining_capacity(&self) -> usize {
self.capacity().saturating_sub(self.current_count())
}
pub fn add_posting(&mut self, document_id: DocumentId, start: u32, length: u32) -> Result<usize, ShardexError> {
if self.read_only {
return Err(ShardexError::Config(
"Cannot add posting to read-only storage".to_string(),
));
}
if self.is_full() {
return Err(ShardexError::Config("Posting storage is at capacity".to_string()));
}
let index = self.current_count();
self.write_posting_at_index(index, document_id, start, length)?;
self.set_deleted_flag(index, false)?;
self.header.current_count += 1;
self.header.active_count += 1;
self.update_header()?;
Ok(index)
}
pub fn get_posting(&self, index: usize) -> Result<(DocumentId, u32, u32), ShardexError> {
if index >= self.current_count() {
return Err(ShardexError::Config(format!(
"Index {} out of bounds (current count: {})",
index,
self.current_count()
)));
}
self.read_posting_at_index(index)
}
pub fn update_posting(
&mut self,
index: usize,
document_id: DocumentId,
start: u32,
length: u32,
) -> Result<(), ShardexError> {
if self.read_only {
return Err(ShardexError::Config(
"Cannot update posting in read-only storage".to_string(),
));
}
if index >= self.current_count() {
return Err(ShardexError::Config(format!(
"Index {} out of bounds (current count: {})",
index,
self.current_count()
)));
}
self.write_posting_at_index(index, document_id, start, length)?;
self.update_header()
}
pub fn remove_posting(&mut self, index: usize) -> Result<(), ShardexError> {
if self.read_only {
return Err(ShardexError::Config(
"Cannot remove posting from read-only storage".to_string(),
));
}
if index >= self.current_count() {
return Err(ShardexError::Config(format!(
"Index {} out of bounds (current count: {})",
index,
self.current_count()
)));
}
self.set_deleted_flag(index, true)?;
if self.header.active_count > 0 {
self.header.active_count -= 1;
}
self.update_header()
}
pub fn is_deleted(&self, index: usize) -> Result<bool, ShardexError> {
if index >= self.current_count() {
return Ok(false); }
self.get_deleted_flag(index)
}
pub fn find_by_document_id(&self, document_id: DocumentId) -> Result<Vec<usize>, ShardexError> {
let mut indices = Vec::new();
for i in 0..self.current_count() {
if !self.is_deleted(i)? {
let (doc_id, _, _) = self.get_posting(i)?;
if doc_id == document_id {
indices.push(i);
}
}
}
Ok(indices)
}
pub fn iter_active(&self) -> impl Iterator<Item = Result<(usize, DocumentId, u32, u32), ShardexError>> + '_ {
(0..self.current_count()).filter_map(move |index| {
match self.is_deleted(index) {
Ok(true) => None, Ok(false) => match self.get_posting(index) {
Ok((doc_id, start, length)) => Some(Ok((index, doc_id, start, length))),
Err(e) => Some(Err(e)),
},
Err(e) => Some(Err(e)),
}
})
}
pub fn iter_backward(&self) -> impl Iterator<Item = Result<(usize, DocumentId, u32, u32), ShardexError>> + '_ {
(0..self.current_count())
.rev()
.map(move |index| match self.get_posting(index) {
Ok((doc_id, start, length)) => Ok((index, doc_id, start, length)),
Err(e) => Err(e),
})
}
pub fn iter_unique_backward(
&self,
) -> impl Iterator<Item = Result<(usize, DocumentId, u32, u32), ShardexError>> + '_ {
use std::collections::HashSet;
let mut seen = HashSet::new();
self.iter_backward().filter_map(move |result| {
match result {
Ok((index, doc_id, start, length)) => {
let key = (doc_id, start, length);
if seen.insert(key) {
Some(Ok((index, doc_id, start, length)))
} else {
None }
}
Err(e) => Some(Err(e)),
}
})
}
pub fn sync(&mut self) -> Result<(), ShardexError> {
if self.read_only {
return Ok(()); }
self.update_header()?;
self.mmap_file.sync()
}
fn write_posting_at_index(
&mut self,
index: usize,
document_id: DocumentId,
start: u32,
length: u32,
) -> Result<(), ShardexError> {
let doc_id_offset = self.header.document_ids_offset as usize + (index * 16);
self.mmap_file.write_at(doc_id_offset, &document_id)?;
let start_offset = self.header.starts_offset as usize + (index * 4);
self.mmap_file.write_at(start_offset, &start)?;
let length_offset = self.header.lengths_offset as usize + (index * 4);
self.mmap_file.write_at(length_offset, &length)?;
Ok(())
}
fn read_posting_at_index(&self, index: usize) -> Result<(DocumentId, u32, u32), ShardexError> {
let doc_id_offset = self.header.document_ids_offset as usize + (index * 16);
let document_id: DocumentId = self.mmap_file.read_at(doc_id_offset)?;
let start_offset = self.header.starts_offset as usize + (index * 4);
let start: u32 = self.mmap_file.read_at(start_offset)?;
let length_offset = self.header.lengths_offset as usize + (index * 4);
let length: u32 = self.mmap_file.read_at(length_offset)?;
Ok((document_id, start, length))
}
fn set_deleted_flag(&mut self, index: usize, deleted: bool) -> Result<(), ShardexError> {
let byte_index = index / 8;
let bit_index = index % 8;
let byte_offset = self.header.deleted_flags_offset as usize + byte_index;
let mut current_byte: u8 = self.mmap_file.read_at(byte_offset)?;
if deleted {
current_byte |= 1 << bit_index;
} else {
current_byte &= !(1 << bit_index);
}
self.mmap_file.write_at(byte_offset, ¤t_byte)?;
Ok(())
}
fn get_deleted_flag(&self, index: usize) -> Result<bool, ShardexError> {
let byte_index = index / 8;
let bit_index = index % 8;
let byte_offset = self.header.deleted_flags_offset as usize + byte_index;
let current_byte: u8 = self.mmap_file.read_at(byte_offset)?;
Ok((current_byte & (1 << bit_index)) != 0)
}
pub fn validate_integrity(&self) -> Result<(), ShardexError> {
self.header.validate()?;
let data_start = PostingStorageHeader::SIZE;
let data_size = self.header.calculate_file_size() - PostingStorageHeader::SIZE;
if data_start + data_size > self.mmap_file.len() {
return Err(ShardexError::Corruption(
"File size is inconsistent with header metadata".to_string(),
));
}
let data = &self.mmap_file.as_slice()[data_start..data_start + data_size];
self.header.file_header.validate_checksum(data)?;
self.validate_data_consistency()?;
Ok(())
}
fn validate_data_consistency(&self) -> Result<(), ShardexError> {
let mut actual_active_count = 0u32;
for i in 0..self.current_count() {
if !self.is_deleted(i)? {
actual_active_count += 1;
}
}
if actual_active_count != self.header.active_count {
return Err(ShardexError::Corruption(format!(
"Active count mismatch: header claims {}, actual count is {}",
self.header.active_count, actual_active_count
)));
}
for i in 0..self.current_count() {
let (doc_id, start, length) = self.read_posting_at_index(i)?;
if length > u32::MAX / 2 {
return Err(ShardexError::Corruption(format!(
"Posting {} has unreasonable length: {}",
i, length
)));
}
if let Some(end_pos) = start.checked_add(length) {
if end_pos < start {
return Err(ShardexError::Corruption(format!(
"Posting {} has invalid range: start={}, length={}",
i, start, length
)));
}
} else {
return Err(ShardexError::Corruption(format!(
"Posting {} position overflow: start={}, length={}",
i, start, length
)));
}
if doc_id.raw() == 0 && !self.is_deleted(i)? {
return Err(ShardexError::Corruption(format!(
"Active posting {} has invalid zero document ID",
i
)));
}
}
Ok(())
}
pub fn memory_mapped_file(&self) -> &MemoryMappedFile {
&self.mmap_file
}
fn update_header(&mut self) -> Result<(), ShardexError> {
let data_start = PostingStorageHeader::SIZE;
let data_size = self.header.calculate_file_size() - PostingStorageHeader::SIZE;
let data = &self.mmap_file.as_slice()[data_start..data_start + data_size];
self.header.file_header.update_checksum(data);
self.mmap_file.write_at(0, &self.header)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::{NamedTempFile, TempDir};
#[test]
fn test_posting_storage_header_creation() {
let header = PostingStorageHeader::new(1000).unwrap();
assert_eq!(header.capacity, 1000);
assert_eq!(header.current_count, 0);
assert_eq!(header.active_count, 0);
assert_eq!(header.document_id_size, 16);
assert!(header.validate().is_ok());
}
#[test]
fn test_posting_storage_header_validation_errors() {
assert!(PostingStorageHeader::new(0).is_err());
assert!(PostingStorageHeader::new(u32::MAX as usize + 1).is_err());
}
#[test]
fn test_create_posting_storage() {
let temp_dir = TempDir::new().unwrap();
let storage_path = temp_dir.path().join("postings.dat");
let storage = PostingStorage::create(&storage_path, 100).unwrap();
assert_eq!(storage.capacity(), 100);
assert_eq!(storage.current_count(), 0);
assert_eq!(storage.active_count(), 0);
assert!(!storage.is_read_only());
assert!(!storage.is_full());
assert_eq!(storage.remaining_capacity(), 100);
}
#[test]
fn test_add_and_get_postings() {
let temp_dir = TempDir::new().unwrap();
let storage_path = temp_dir.path().join("postings.dat");
let mut storage = PostingStorage::create(&storage_path, 10).unwrap();
let doc_id1 = DocumentId::new();
let doc_id2 = DocumentId::new();
let idx1 = storage.add_posting(doc_id1, 100, 50).unwrap();
let idx2 = storage.add_posting(doc_id2, 200, 75).unwrap();
assert_eq!(idx1, 0);
assert_eq!(idx2, 1);
assert_eq!(storage.current_count(), 2);
assert_eq!(storage.active_count(), 2);
let (retrieved_doc_id1, start1, length1) = storage.get_posting(idx1).unwrap();
let (retrieved_doc_id2, start2, length2) = storage.get_posting(idx2).unwrap();
assert_eq!(retrieved_doc_id1, doc_id1);
assert_eq!(start1, 100);
assert_eq!(length1, 50);
assert_eq!(retrieved_doc_id2, doc_id2);
assert_eq!(start2, 200);
assert_eq!(length2, 75);
}
#[test]
fn test_capacity_limits() {
let temp_dir = TempDir::new().unwrap();
let storage_path = temp_dir.path().join("postings.dat");
let mut storage = PostingStorage::create(&storage_path, 2).unwrap();
let doc_id = DocumentId::new();
storage.add_posting(doc_id, 100, 50).unwrap();
storage.add_posting(doc_id, 200, 75).unwrap();
assert!(storage.is_full());
assert_eq!(storage.remaining_capacity(), 0);
let result = storage.add_posting(doc_id, 300, 25);
assert!(matches!(result, Err(ShardexError::Config(_))));
}
#[test]
fn test_update_posting() {
let temp_dir = TempDir::new().unwrap();
let storage_path = temp_dir.path().join("postings.dat");
let mut storage = PostingStorage::create(&storage_path, 10).unwrap();
let doc_id1 = DocumentId::new();
let doc_id2 = DocumentId::new();
let idx = storage.add_posting(doc_id1, 100, 50).unwrap();
storage.update_posting(idx, doc_id2, 200, 75).unwrap();
let (retrieved_doc_id, start, length) = storage.get_posting(idx).unwrap();
assert_eq!(retrieved_doc_id, doc_id2);
assert_eq!(start, 200);
assert_eq!(length, 75);
}
#[test]
fn test_remove_posting() {
let temp_dir = TempDir::new().unwrap();
let storage_path = temp_dir.path().join("postings.dat");
let mut storage = PostingStorage::create(&storage_path, 10).unwrap();
let doc_id1 = DocumentId::new();
let doc_id2 = DocumentId::new();
let idx1 = storage.add_posting(doc_id1, 100, 50).unwrap();
let idx2 = storage.add_posting(doc_id2, 200, 75).unwrap();
assert_eq!(storage.active_count(), 2);
storage.remove_posting(idx1).unwrap();
assert_eq!(storage.current_count(), 2); assert_eq!(storage.active_count(), 1); assert!(storage.is_deleted(idx1).unwrap());
assert!(!storage.is_deleted(idx2).unwrap());
let (retrieved_doc_id2, start2, length2) = storage.get_posting(idx2).unwrap();
assert_eq!(retrieved_doc_id2, doc_id2);
assert_eq!(start2, 200);
assert_eq!(length2, 75);
}
#[test]
fn test_find_by_document_id() {
let temp_dir = TempDir::new().unwrap();
let storage_path = temp_dir.path().join("postings.dat");
let mut storage = PostingStorage::create(&storage_path, 10).unwrap();
let doc_id1 = DocumentId::new();
let doc_id2 = DocumentId::new();
let idx1 = storage.add_posting(doc_id1, 100, 50).unwrap();
let idx2 = storage.add_posting(doc_id2, 200, 75).unwrap();
let idx3 = storage.add_posting(doc_id1, 300, 25).unwrap();
let indices1 = storage.find_by_document_id(doc_id1).unwrap();
let indices2 = storage.find_by_document_id(doc_id2).unwrap();
assert_eq!(indices1, vec![idx1, idx3]);
assert_eq!(indices2, vec![idx2]);
storage.remove_posting(idx3).unwrap();
let indices1_after_removal = storage.find_by_document_id(doc_id1).unwrap();
assert_eq!(indices1_after_removal, vec![idx1]); }
#[test]
fn test_iter_active() {
let temp_dir = TempDir::new().unwrap();
let storage_path = temp_dir.path().join("postings.dat");
let mut storage = PostingStorage::create(&storage_path, 10).unwrap();
let doc_id1 = DocumentId::new();
let doc_id2 = DocumentId::new();
let doc_id3 = DocumentId::new();
let idx1 = storage.add_posting(doc_id1, 100, 50).unwrap();
let idx2 = storage.add_posting(doc_id2, 200, 75).unwrap();
let idx3 = storage.add_posting(doc_id3, 300, 25).unwrap();
storage.remove_posting(idx2).unwrap();
let active_postings: Result<Vec<_>, _> = storage.iter_active().collect();
let active_postings = active_postings.unwrap();
assert_eq!(active_postings.len(), 2);
let indices: Vec<usize> = active_postings.iter().map(|(idx, _, _, _)| *idx).collect();
assert!(indices.contains(&idx1));
assert!(indices.contains(&idx3));
assert!(!indices.contains(&idx2)); }
#[test]
fn test_out_of_bounds_access() {
let temp_dir = TempDir::new().unwrap();
let storage_path = temp_dir.path().join("postings.dat");
let storage = PostingStorage::create(&storage_path, 10).unwrap();
let result = storage.get_posting(0);
assert!(matches!(result, Err(ShardexError::Config(_))));
let result = storage.get_posting(5);
assert!(matches!(result, Err(ShardexError::Config(_))));
}
#[test]
fn test_persistence() {
let temp_file = NamedTempFile::new().unwrap();
let storage_path = temp_file.path();
let postings_to_add = vec![
(DocumentId::new(), 100, 50),
(DocumentId::new(), 200, 75),
(DocumentId::new(), 300, 25),
];
{
let mut storage = PostingStorage::create(storage_path, 10).unwrap();
for (doc_id, start, length) in &postings_to_add {
storage.add_posting(*doc_id, *start, *length).unwrap();
}
storage.sync().unwrap();
}
{
let storage = PostingStorage::open(storage_path).unwrap();
assert_eq!(storage.capacity(), 10);
assert_eq!(storage.current_count(), 3);
assert_eq!(storage.active_count(), 3);
for (i, (expected_doc_id, expected_start, expected_length)) in postings_to_add.iter().enumerate() {
let (retrieved_doc_id, start, length) = storage.get_posting(i).unwrap();
assert_eq!(retrieved_doc_id, *expected_doc_id);
assert_eq!(start, *expected_start);
assert_eq!(length, *expected_length);
}
}
}
#[test]
fn test_read_only_mode() {
let temp_file = NamedTempFile::new().unwrap();
let storage_path = temp_file.path();
{
let mut storage = PostingStorage::create(storage_path, 5).unwrap();
let doc_id = DocumentId::new();
storage.add_posting(doc_id, 100, 50).unwrap();
storage.sync().unwrap();
}
{
let mut storage = PostingStorage::open_read_only(storage_path).unwrap();
assert!(storage.is_read_only());
assert_eq!(storage.current_count(), 1);
let (_retrieved_doc_id, start, length) = storage.get_posting(0).unwrap();
assert_eq!(start, 100);
assert_eq!(length, 50);
let new_doc_id = DocumentId::new();
assert!(storage.add_posting(new_doc_id, 200, 75).is_err());
assert!(storage.update_posting(0, new_doc_id, 200, 75).is_err());
assert!(storage.remove_posting(0).is_err());
}
}
#[test]
fn test_header_bytemuck_compatibility() {
let header = PostingStorageHeader::new(1000).unwrap();
let bytes = bytemuck::bytes_of(&header);
assert_eq!(bytes.len(), PostingStorageHeader::SIZE);
let header_restored = bytemuck::from_bytes::<PostingStorageHeader>(bytes);
assert_eq!(header.capacity, header_restored.capacity);
assert_eq!(header.current_count, header_restored.current_count);
assert_eq!(header.active_count, header_restored.active_count);
}
#[test]
fn test_header_validation() {
let mut header = PostingStorageHeader::new(1000).unwrap();
assert!(header.validate().is_ok());
header.file_header.magic = *magic::TEST_CORRUPTION;
assert!(header.validate().is_err());
header.file_header.magic = *magic::POSTING_STORAGE;
header.file_header.version = 999;
assert!(header.validate().is_err());
header.file_header.version = POSTING_STORAGE_VERSION;
header.document_id_size = 8; assert!(header.validate().is_err());
header.document_id_size = 16;
header.current_count = header.capacity + 1;
assert!(header.validate().is_err());
header.current_count = 0;
header.active_count = header.current_count + 1;
assert!(header.validate().is_err());
header.active_count = 0;
assert!(header.validate().is_ok());
}
#[test]
fn test_deleted_flag_operations() {
let temp_dir = TempDir::new().unwrap();
let storage_path = temp_dir.path().join("postings.dat");
let mut storage = PostingStorage::create(&storage_path, 100).unwrap();
let doc_ids: Vec<DocumentId> = (0..10).map(|_| DocumentId::new()).collect();
let mut indices = Vec::new();
for doc_id in &doc_ids {
let idx = storage.add_posting(*doc_id, 100, 50).unwrap();
indices.push(idx);
}
for idx in &indices {
assert!(!storage.is_deleted(*idx).unwrap());
}
storage.remove_posting(indices[1]).unwrap();
storage.remove_posting(indices[5]).unwrap();
storage.remove_posting(indices[8]).unwrap();
for (i, idx) in indices.iter().enumerate() {
let expected_deleted = i == 1 || i == 5 || i == 8;
assert_eq!(storage.is_deleted(*idx).unwrap(), expected_deleted);
}
assert_eq!(storage.active_count(), 7); }
#[test]
fn test_file_size_calculation() {
let header = PostingStorageHeader::new(1000).unwrap();
let expected_size = PostingStorageHeader::SIZE +
(1000 * 16) + (1000 * 4) + (1000 * 4) + 1000_usize.div_ceil(8);
assert_eq!(header.calculate_file_size(), expected_size);
}
#[test]
fn test_bit_manipulation() {
let temp_dir = TempDir::new().unwrap();
let storage_path = temp_dir.path().join("bits.dat");
let mut storage = PostingStorage::create(&storage_path, 64).unwrap();
for i in 0..64 {
let doc_id = DocumentId::new();
storage.add_posting(doc_id, i as u32 * 100, 50).unwrap();
}
let test_indices = [0, 1, 7, 8, 15, 16, 31, 32, 63];
for &idx in &test_indices {
storage.remove_posting(idx).unwrap();
assert!(storage.is_deleted(idx).unwrap());
}
for i in 0..64 {
if !test_indices.contains(&i) {
assert!(!storage.is_deleted(i).unwrap());
}
}
}
}