1use super::cache::BlockId;
7use super::config::CompressionAlgorithm;
8use super::stats::CompressionMetadata;
9use std::marker::PhantomData;
10
11#[derive(Debug)]
13pub struct CompressedMatrix<T> {
14 pub matrixid: u64,
15 pub original_rows: usize,
16 pub original_cols: usize,
17 pub compressed_blocks: Vec<CompressedBlock>,
18 pub compression_algorithm: CompressionAlgorithm,
19 pub block_size: usize,
20 pub metadata: CompressionMetadata,
21 _phantom: PhantomData<T>,
22}
23
24#[derive(Debug, Clone)]
26pub struct CompressedBlock {
27 pub blockid: BlockId,
28 pub block_type: BlockType,
29 pub compressed_data: Vec<u8>,
30 pub original_size: usize,
31 pub compression_level: u8,
32 pub checksum: Option<u64>,
33 pub timestamp: u64,
34}
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
38pub enum BlockType {
39 IndPtr,
40 Indices,
41 #[default]
42 Data,
43 Combined,
44 Metadata,
45}
46
47#[derive(Debug)]
49#[allow(dead_code)]
50pub(crate) struct BlockHeader {
51 pub blockid: BlockId,
52 pub block_type: u8,
53 pub original_size: usize,
54 pub compressed_size: usize,
55 pub compression_level: u8,
56 pub checksum: u64,
57 pub timestamp: u64,
58}
59
60#[repr(C)]
62#[allow(dead_code)]
63pub(crate) struct BlockHeaderSerialized {
64 pub blockid: u64,
65 pub block_type: u8,
66 pub original_size: u64,
67 pub compressed_size: u64,
68 pub compression_level: u8,
69 pub checksum: u64,
70 pub timestamp: u64,
71 pub padding: [u8; 3], }
73
74impl<T> CompressedMatrix<T> {
75 pub fn new(
77 matrix_id: u64,
78 original_rows: usize,
79 original_cols: usize,
80 compression_algorithm: CompressionAlgorithm,
81 block_size: usize,
82 ) -> Self {
83 Self {
84 matrixid: matrix_id,
85 original_rows,
86 original_cols,
87 compressed_blocks: Vec::new(),
88 compression_algorithm,
89 block_size,
90 metadata: CompressionMetadata::new(0, 0, 0.0),
91 _phantom: PhantomData,
92 }
93 }
94
95 pub fn add_block(&mut self, block: CompressedBlock) {
97 self.compressed_blocks.push(block);
98 self.update_metadata();
99 }
100
101 pub fn get_block(&self, block_id: &BlockId) -> Option<&CompressedBlock> {
103 self.compressed_blocks
104 .iter()
105 .find(|block| &block.blockid == block_id)
106 }
107
108 pub fn get_block_mut(&mut self, block_id: &BlockId) -> Option<&mut CompressedBlock> {
110 self.compressed_blocks
111 .iter_mut()
112 .find(|block| &block.blockid == block_id)
113 }
114
115 pub fn remove_block(&mut self, block_id: &BlockId) -> Option<CompressedBlock> {
117 if let Some(pos) = self
118 .compressed_blocks
119 .iter()
120 .position(|block| &block.blockid == block_id)
121 {
122 let removed = self.compressed_blocks.remove(pos);
123 self.update_metadata();
124 Some(removed)
125 } else {
126 None
127 }
128 }
129
130 pub fn get_blocks_by_type(&self, block_type: BlockType) -> Vec<&CompressedBlock> {
132 self.compressed_blocks
133 .iter()
134 .filter(|block| block.block_type == block_type)
135 .collect()
136 }
137
138 fn update_metadata(&mut self) {
140 let total_original_size: usize = self
141 .compressed_blocks
142 .iter()
143 .map(|block| block.original_size)
144 .sum();
145
146 let total_compressed_size: usize = self
147 .compressed_blocks
148 .iter()
149 .map(|block| block.compressed_data.len())
150 .sum();
151
152 self.metadata = CompressionMetadata::new(
153 total_original_size,
154 total_compressed_size,
155 0.0, );
157 }
158
159 pub fn block_count(&self) -> usize {
161 self.compressed_blocks.len()
162 }
163
164 pub fn compressed_size(&self) -> usize {
166 self.compressed_blocks
167 .iter()
168 .map(|block| block.compressed_data.len())
169 .sum()
170 }
171
172 pub fn original_size(&self) -> usize {
174 self.compressed_blocks
175 .iter()
176 .map(|block| block.original_size)
177 .sum()
178 }
179
180 pub fn compression_ratio(&self) -> f64 {
182 self.metadata.compression_ratio
183 }
184
185 pub fn verify_integrity(&self) -> Result<(), String> {
187 for block in &self.compressed_blocks {
188 if let Some(expected_checksum) = block.checksum {
189 let actual_checksum = Self::calculate_checksum(&block.compressed_data);
190 if actual_checksum != expected_checksum {
191 return Err(format!("Checksum mismatch for block {}", block.blockid));
192 }
193 }
194 }
195 Ok(())
196 }
197
198 fn calculate_checksum(data: &[u8]) -> u64 {
200 use std::collections::hash_map::DefaultHasher;
201 use std::hash::{Hash, Hasher};
202
203 let mut hasher = DefaultHasher::new();
204 data.hash(&mut hasher);
205 hasher.finish()
206 }
207
208 pub fn memory_footprint(&self) -> usize {
210 std::mem::size_of::<Self>()
211 + self
212 .compressed_blocks
213 .iter()
214 .map(|block| block.memory_footprint())
215 .sum::<usize>()
216 }
217
218 pub fn optimize_blocks(&mut self) {
220 self.compressed_blocks.sort_by(|a, b| {
222 a.blockid
223 .block_row
224 .cmp(&b.blockid.block_row)
225 .then_with(|| a.blockid.block_col.cmp(&b.blockid.block_col))
226 });
227 }
228
229 pub fn get_blocks_row_major(&self) -> Vec<&CompressedBlock> {
231 let mut blocks = self.compressed_blocks.iter().collect::<Vec<_>>();
232 blocks.sort_by(|a, b| {
233 a.blockid
234 .block_row
235 .cmp(&b.blockid.block_row)
236 .then_with(|| a.blockid.block_col.cmp(&b.blockid.block_col))
237 });
238 blocks
239 }
240
241 pub fn export_metadata(&self) -> MatrixMetadataExport {
243 MatrixMetadataExport {
244 matrix_id: self.matrixid,
245 original_rows: self.original_rows,
246 original_cols: self.original_cols,
247 block_count: self.compressed_blocks.len(),
248 compression_algorithm: self.compression_algorithm,
249 block_size: self.block_size,
250 total_original_size: self.original_size(),
251 total_compressed_size: self.compressed_size(),
252 compression_ratio: self.compression_ratio(),
253 block_map: self
254 .compressed_blocks
255 .iter()
256 .map(|block| (block.blockid.clone(), block.block_type))
257 .collect(),
258 }
259 }
260}
261
262impl CompressedBlock {
263 pub fn new(
265 block_id: BlockId,
266 block_type: BlockType,
267 compressed_data: Vec<u8>,
268 original_size: usize,
269 compression_level: u8,
270 ) -> Self {
271 let checksum = Self::calculate_checksum(&compressed_data);
272
273 Self {
274 blockid: block_id,
275 block_type,
276 compressed_data,
277 original_size,
278 compression_level,
279 checksum: Some(checksum),
280 timestamp: Self::current_timestamp(),
281 }
282 }
283
284 pub fn new_unchecked(
286 block_id: BlockId,
287 block_type: BlockType,
288 compressed_data: Vec<u8>,
289 original_size: usize,
290 compression_level: u8,
291 ) -> Self {
292 Self {
293 blockid: block_id,
294 block_type,
295 compressed_data,
296 original_size,
297 compression_level,
298 checksum: None,
299 timestamp: Self::current_timestamp(),
300 }
301 }
302
303 pub fn compression_ratio(&self) -> f64 {
305 if self.original_size > 0 {
306 self.compressed_data.len() as f64 / self.original_size as f64
307 } else {
308 1.0
309 }
310 }
311
312 pub fn space_savings(&self) -> usize {
314 self.original_size
315 .saturating_sub(self.compressed_data.len())
316 }
317
318 pub fn verify_integrity(&self) -> bool {
320 if let Some(expected_checksum) = self.checksum {
321 let actual_checksum = Self::calculate_checksum(&self.compressed_data);
322 actual_checksum == expected_checksum
323 } else {
324 true }
326 }
327
328 pub fn update_checksum(&mut self) {
330 self.checksum = Some(Self::calculate_checksum(&self.compressed_data));
331 }
332
333 fn calculate_checksum(data: &[u8]) -> u64 {
335 use std::collections::hash_map::DefaultHasher;
336 use std::hash::{Hash, Hasher};
337
338 let mut hasher = DefaultHasher::new();
339 data.hash(&mut hasher);
340 hasher.finish()
341 }
342
343 fn current_timestamp() -> u64 {
345 std::time::SystemTime::now()
346 .duration_since(std::time::UNIX_EPOCH)
347 .unwrap_or_default()
348 .as_secs()
349 }
350
351 pub fn memory_footprint(&self) -> usize {
353 std::mem::size_of::<Self>() + self.compressed_data.len()
354 }
355
356 pub fn age_seconds(&self) -> u64 {
358 Self::current_timestamp().saturating_sub(self.timestamp)
359 }
360
361 pub fn is_old(&self, max_age_seconds: u64) -> bool {
363 self.age_seconds() > max_age_seconds
364 }
365
366 pub fn clone_data(&self) -> Vec<u8> {
368 self.compressed_data.clone()
369 }
370
371 pub fn size_info(&self) -> BlockSizeInfo {
373 BlockSizeInfo {
374 original_size: self.original_size,
375 compressed_size: self.compressed_data.len(),
376 compression_ratio: self.compression_ratio(),
377 space_savings: self.space_savings(),
378 }
379 }
380}
381
382impl BlockType {
383 pub fn as_str(&self) -> &'static str {
385 match self {
386 BlockType::IndPtr => "indptr",
387 BlockType::Indices => "indices",
388 BlockType::Data => "data",
389 BlockType::Combined => "combined",
390 BlockType::Metadata => "metadata",
391 }
392 }
393
394 pub fn from_str(s: &str) -> Option<Self> {
396 match s.to_lowercase().as_str() {
397 "indptr" => Some(BlockType::IndPtr),
398 "indices" => Some(BlockType::Indices),
399 "data" => Some(BlockType::Data),
400 "combined" => Some(BlockType::Combined),
401 "metadata" => Some(BlockType::Metadata),
402 _ => None,
403 }
404 }
405
406 pub fn compression_priority(&self) -> u8 {
408 match self {
409 BlockType::Data => 10, BlockType::Indices => 8, BlockType::Combined => 7, BlockType::IndPtr => 5, BlockType::Metadata => 3, }
415 }
416
417 pub fn benefits_from_compression(&self) -> bool {
419 match self {
420 BlockType::Data => true, BlockType::Indices => true, BlockType::Combined => true, BlockType::IndPtr => false, BlockType::Metadata => false, }
426 }
427}
428
429impl BlockHeader {
430 pub fn new(
432 block_id: BlockId,
433 block_type: BlockType,
434 original_size: usize,
435 compressed_size: usize,
436 compression_level: u8,
437 ) -> Self {
438 Self {
439 blockid: block_id,
440 block_type: block_type as u8,
441 original_size,
442 compressed_size,
443 compression_level,
444 checksum: 0, timestamp: std::time::SystemTime::now()
446 .duration_since(std::time::UNIX_EPOCH)
447 .unwrap_or_default()
448 .as_secs(),
449 }
450 }
451
452 #[allow(dead_code)]
454 pub fn serialize(&self) -> Vec<u8> {
455 let serialized = BlockHeaderSerialized {
456 blockid: self.blockid.to_u64(),
457 block_type: self.block_type,
458 original_size: self.original_size as u64,
459 compressed_size: self.compressed_size as u64,
460 compression_level: self.compression_level,
461 checksum: self.checksum,
462 timestamp: self.timestamp,
463 padding: [0; 3],
464 };
465
466 unsafe {
468 let ptr = &serialized as *const BlockHeaderSerialized as *const u8;
469 std::slice::from_raw_parts(ptr, std::mem::size_of::<BlockHeaderSerialized>()).to_vec()
470 }
471 }
472
473 #[allow(dead_code)]
475 pub fn deserialize(data: &[u8]) -> Result<Self, String> {
476 if data.len() < std::mem::size_of::<BlockHeaderSerialized>() {
477 return Err("Invalid header size".to_string());
478 }
479
480 let serialized: BlockHeaderSerialized = unsafe {
482 let ptr = data.as_ptr() as *const BlockHeaderSerialized;
483 ptr.read()
484 };
485
486 Ok(BlockHeader {
487 blockid: BlockId::from_u64(serialized.blockid),
488 block_type: serialized.block_type,
489 original_size: serialized.original_size as usize,
490 compressed_size: serialized.compressed_size as usize,
491 compression_level: serialized.compression_level,
492 checksum: serialized.checksum,
493 timestamp: serialized.timestamp,
494 })
495 }
496
497 pub fn size() -> usize {
499 std::mem::size_of::<BlockHeaderSerialized>()
500 }
501}
502
503#[derive(Debug, Clone)]
505pub struct MatrixMetadataExport {
506 pub matrix_id: u64,
507 pub original_rows: usize,
508 pub original_cols: usize,
509 pub block_count: usize,
510 pub compression_algorithm: CompressionAlgorithm,
511 pub block_size: usize,
512 pub total_original_size: usize,
513 pub total_compressed_size: usize,
514 pub compression_ratio: f64,
515 pub block_map: Vec<(BlockId, BlockType)>,
516}
517
518#[derive(Debug, Clone)]
520pub struct BlockSizeInfo {
521 pub original_size: usize,
522 pub compressed_size: usize,
523 pub compression_ratio: f64,
524 pub space_savings: usize,
525}
526
527impl std::fmt::Display for BlockType {
528 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
529 write!(f, "{}", self.as_str())
530 }
531}