use super::cache::BlockId;
use super::config::CompressionAlgorithm;
use super::stats::CompressionMetadata;
use std::marker::PhantomData;
#[derive(Debug)]
pub struct CompressedMatrix<T> {
pub matrixid: u64,
pub original_rows: usize,
pub original_cols: usize,
pub compressed_blocks: Vec<CompressedBlock>,
pub compression_algorithm: CompressionAlgorithm,
pub block_size: usize,
pub metadata: CompressionMetadata,
_phantom: PhantomData<T>,
}
#[derive(Debug, Clone)]
pub struct CompressedBlock {
pub blockid: BlockId,
pub block_type: BlockType,
pub compressed_data: Vec<u8>,
pub original_size: usize,
pub compression_level: u8,
pub checksum: Option<u64>,
pub timestamp: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum BlockType {
IndPtr,
Indices,
#[default]
Data,
Combined,
Metadata,
}
#[derive(Debug)]
#[allow(dead_code)]
pub(crate) struct BlockHeader {
pub blockid: BlockId,
pub block_type: u8,
pub original_size: usize,
pub compressed_size: usize,
pub compression_level: u8,
pub checksum: u64,
pub timestamp: u64,
}
#[repr(C)]
#[allow(dead_code)]
pub(crate) struct BlockHeaderSerialized {
pub blockid: u64,
pub block_type: u8,
pub original_size: u64,
pub compressed_size: u64,
pub compression_level: u8,
pub checksum: u64,
pub timestamp: u64,
pub padding: [u8; 3], }
impl<T> CompressedMatrix<T> {
pub fn new(
matrix_id: u64,
original_rows: usize,
original_cols: usize,
compression_algorithm: CompressionAlgorithm,
block_size: usize,
) -> Self {
Self {
matrixid: matrix_id,
original_rows,
original_cols,
compressed_blocks: Vec::new(),
compression_algorithm,
block_size,
metadata: CompressionMetadata::new(0, 0, 0.0),
_phantom: PhantomData,
}
}
pub fn add_block(&mut self, block: CompressedBlock) {
self.compressed_blocks.push(block);
self.update_metadata();
}
pub fn get_block(&self, block_id: &BlockId) -> Option<&CompressedBlock> {
self.compressed_blocks
.iter()
.find(|block| &block.blockid == block_id)
}
pub fn get_block_mut(&mut self, block_id: &BlockId) -> Option<&mut CompressedBlock> {
self.compressed_blocks
.iter_mut()
.find(|block| &block.blockid == block_id)
}
pub fn remove_block(&mut self, block_id: &BlockId) -> Option<CompressedBlock> {
if let Some(pos) = self
.compressed_blocks
.iter()
.position(|block| &block.blockid == block_id)
{
let removed = self.compressed_blocks.remove(pos);
self.update_metadata();
Some(removed)
} else {
None
}
}
pub fn get_blocks_by_type(&self, block_type: BlockType) -> Vec<&CompressedBlock> {
self.compressed_blocks
.iter()
.filter(|block| block.block_type == block_type)
.collect()
}
fn update_metadata(&mut self) {
let total_original_size: usize = self
.compressed_blocks
.iter()
.map(|block| block.original_size)
.sum();
let total_compressed_size: usize = self
.compressed_blocks
.iter()
.map(|block| block.compressed_data.len())
.sum();
self.metadata = CompressionMetadata::new(
total_original_size,
total_compressed_size,
0.0, );
}
pub fn block_count(&self) -> usize {
self.compressed_blocks.len()
}
pub fn compressed_size(&self) -> usize {
self.compressed_blocks
.iter()
.map(|block| block.compressed_data.len())
.sum()
}
pub fn original_size(&self) -> usize {
self.compressed_blocks
.iter()
.map(|block| block.original_size)
.sum()
}
pub fn compression_ratio(&self) -> f64 {
self.metadata.compression_ratio
}
pub fn verify_integrity(&self) -> Result<(), String> {
for block in &self.compressed_blocks {
if let Some(expected_checksum) = block.checksum {
let actual_checksum = Self::calculate_checksum(&block.compressed_data);
if actual_checksum != expected_checksum {
return Err(format!("Checksum mismatch for block {}", block.blockid));
}
}
}
Ok(())
}
fn calculate_checksum(data: &[u8]) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
data.hash(&mut hasher);
hasher.finish()
}
pub fn memory_footprint(&self) -> usize {
std::mem::size_of::<Self>()
+ self
.compressed_blocks
.iter()
.map(|block| block.memory_footprint())
.sum::<usize>()
}
pub fn optimize_blocks(&mut self) {
self.compressed_blocks.sort_by(|a, b| {
a.blockid
.block_row
.cmp(&b.blockid.block_row)
.then_with(|| a.blockid.block_col.cmp(&b.blockid.block_col))
});
}
pub fn get_blocks_row_major(&self) -> Vec<&CompressedBlock> {
let mut blocks = self.compressed_blocks.iter().collect::<Vec<_>>();
blocks.sort_by(|a, b| {
a.blockid
.block_row
.cmp(&b.blockid.block_row)
.then_with(|| a.blockid.block_col.cmp(&b.blockid.block_col))
});
blocks
}
pub fn export_metadata(&self) -> MatrixMetadataExport {
MatrixMetadataExport {
matrix_id: self.matrixid,
original_rows: self.original_rows,
original_cols: self.original_cols,
block_count: self.compressed_blocks.len(),
compression_algorithm: self.compression_algorithm,
block_size: self.block_size,
total_original_size: self.original_size(),
total_compressed_size: self.compressed_size(),
compression_ratio: self.compression_ratio(),
block_map: self
.compressed_blocks
.iter()
.map(|block| (block.blockid.clone(), block.block_type))
.collect(),
}
}
}
impl CompressedBlock {
pub fn new(
block_id: BlockId,
block_type: BlockType,
compressed_data: Vec<u8>,
original_size: usize,
compression_level: u8,
) -> Self {
let checksum = Self::calculate_checksum(&compressed_data);
Self {
blockid: block_id,
block_type,
compressed_data,
original_size,
compression_level,
checksum: Some(checksum),
timestamp: Self::current_timestamp(),
}
}
pub fn new_unchecked(
block_id: BlockId,
block_type: BlockType,
compressed_data: Vec<u8>,
original_size: usize,
compression_level: u8,
) -> Self {
Self {
blockid: block_id,
block_type,
compressed_data,
original_size,
compression_level,
checksum: None,
timestamp: Self::current_timestamp(),
}
}
pub fn compression_ratio(&self) -> f64 {
if self.original_size > 0 {
self.compressed_data.len() as f64 / self.original_size as f64
} else {
1.0
}
}
pub fn space_savings(&self) -> usize {
self.original_size
.saturating_sub(self.compressed_data.len())
}
pub fn verify_integrity(&self) -> bool {
if let Some(expected_checksum) = self.checksum {
let actual_checksum = Self::calculate_checksum(&self.compressed_data);
actual_checksum == expected_checksum
} else {
true }
}
pub fn update_checksum(&mut self) {
self.checksum = Some(Self::calculate_checksum(&self.compressed_data));
}
fn calculate_checksum(data: &[u8]) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
data.hash(&mut hasher);
hasher.finish()
}
fn current_timestamp() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}
pub fn memory_footprint(&self) -> usize {
std::mem::size_of::<Self>() + self.compressed_data.len()
}
pub fn age_seconds(&self) -> u64 {
Self::current_timestamp().saturating_sub(self.timestamp)
}
pub fn is_old(&self, max_age_seconds: u64) -> bool {
self.age_seconds() > max_age_seconds
}
pub fn clone_data(&self) -> Vec<u8> {
self.compressed_data.clone()
}
pub fn size_info(&self) -> BlockSizeInfo {
BlockSizeInfo {
original_size: self.original_size,
compressed_size: self.compressed_data.len(),
compression_ratio: self.compression_ratio(),
space_savings: self.space_savings(),
}
}
}
impl BlockType {
pub fn as_str(&self) -> &'static str {
match self {
BlockType::IndPtr => "indptr",
BlockType::Indices => "indices",
BlockType::Data => "data",
BlockType::Combined => "combined",
BlockType::Metadata => "metadata",
}
}
pub fn from_str(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"indptr" => Some(BlockType::IndPtr),
"indices" => Some(BlockType::Indices),
"data" => Some(BlockType::Data),
"combined" => Some(BlockType::Combined),
"metadata" => Some(BlockType::Metadata),
_ => None,
}
}
pub fn compression_priority(&self) -> u8 {
match self {
BlockType::Data => 10, BlockType::Indices => 8, BlockType::Combined => 7, BlockType::IndPtr => 5, BlockType::Metadata => 3, }
}
pub fn benefits_from_compression(&self) -> bool {
match self {
BlockType::Data => true, BlockType::Indices => true, BlockType::Combined => true, BlockType::IndPtr => false, BlockType::Metadata => false, }
}
}
impl BlockHeader {
pub fn new(
block_id: BlockId,
block_type: BlockType,
original_size: usize,
compressed_size: usize,
compression_level: u8,
) -> Self {
Self {
blockid: block_id,
block_type: block_type as u8,
original_size,
compressed_size,
compression_level,
checksum: 0, timestamp: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs(),
}
}
#[allow(dead_code)]
pub fn serialize(&self) -> Vec<u8> {
let serialized = BlockHeaderSerialized {
blockid: self.blockid.to_u64(),
block_type: self.block_type,
original_size: self.original_size as u64,
compressed_size: self.compressed_size as u64,
compression_level: self.compression_level,
checksum: self.checksum,
timestamp: self.timestamp,
padding: [0; 3],
};
unsafe {
let ptr = &serialized as *const BlockHeaderSerialized as *const u8;
std::slice::from_raw_parts(ptr, std::mem::size_of::<BlockHeaderSerialized>()).to_vec()
}
}
#[allow(dead_code)]
pub fn deserialize(data: &[u8]) -> Result<Self, String> {
if data.len() < std::mem::size_of::<BlockHeaderSerialized>() {
return Err("Invalid header size".to_string());
}
let serialized: BlockHeaderSerialized = unsafe {
let ptr = data.as_ptr() as *const BlockHeaderSerialized;
ptr.read()
};
Ok(BlockHeader {
blockid: BlockId::from_u64(serialized.blockid),
block_type: serialized.block_type,
original_size: serialized.original_size as usize,
compressed_size: serialized.compressed_size as usize,
compression_level: serialized.compression_level,
checksum: serialized.checksum,
timestamp: serialized.timestamp,
})
}
pub fn size() -> usize {
std::mem::size_of::<BlockHeaderSerialized>()
}
}
#[derive(Debug, Clone)]
pub struct MatrixMetadataExport {
pub matrix_id: u64,
pub original_rows: usize,
pub original_cols: usize,
pub block_count: usize,
pub compression_algorithm: CompressionAlgorithm,
pub block_size: usize,
pub total_original_size: usize,
pub total_compressed_size: usize,
pub compression_ratio: f64,
pub block_map: Vec<(BlockId, BlockType)>,
}
#[derive(Debug, Clone)]
pub struct BlockSizeInfo {
pub original_size: usize,
pub compressed_size: usize,
pub compression_ratio: f64,
pub space_savings: usize,
}
impl std::fmt::Display for BlockType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}