1use super::cache::BlockId;
7use super::compressed_data::{BlockType, CompressedBlock};
8use super::config::CompressionAlgorithm;
9use super::stats::SparsityPatternAnalysis;
10use crate::error::{SparseError, SparseResult};
11use scirs2_core::numeric::{Float, NumAssign};
12use std::collections::HashMap;
13
14#[derive(Debug)]
16pub struct CompressionEngine {
17 current_strategy: CompressionStrategy,
19 algorithm_metrics: HashMap<CompressionAlgorithm, AlgorithmMetrics>,
21 huffman_tables: HashMap<String, HuffmanTable>,
23}
24
25#[derive(Debug, Clone)]
27pub(crate) struct CompressionStrategy {
28 pub algorithm: CompressionAlgorithm,
29 pub block_size: usize,
30 pub hierarchical: bool,
31 pub predicted_ratio: f64,
32 pub actual_ratio: f64,
33 pub compression_speed: f64,
34 pub decompression_speed: f64,
35}
36
37#[derive(Debug, Clone)]
39pub struct AlgorithmMetrics {
40 pub total_operations: usize,
41 pub total_compression_time: f64,
42 pub total_decompression_time: f64,
43 pub total_original_size: usize,
44 pub total_compressed_size: usize,
45 pub success_count: usize,
46}
47
48#[derive(Debug)]
50pub struct CompressionResult {
51 pub compressed_data: Vec<u8>,
52 pub original_size: usize,
53 pub compression_ratio: f64,
54 pub compression_time: f64,
55 pub algorithm_used: CompressionAlgorithm,
56}
57
58#[derive(Debug, Clone)]
60struct HuffmanTable {
61 encode_table: HashMap<u8, Vec<bool>>,
62 decode_tree: HuffmanNode,
63}
64
65#[derive(Debug, Clone, PartialEq, Eq)]
67enum HuffmanNode {
68 Leaf {
69 value: u8,
70 },
71 Internal {
72 left: Box<HuffmanNode>,
73 right: Box<HuffmanNode>,
74 },
75}
76
77#[derive(Debug, Clone, Eq, PartialEq)]
79struct FrequencyNode {
80 frequency: usize,
81 node: HuffmanNode,
82}
83
84impl std::cmp::Ord for FrequencyNode {
85 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
86 other.frequency.cmp(&self.frequency) }
88}
89
90impl std::cmp::PartialOrd for FrequencyNode {
91 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
92 Some(self.cmp(other))
93 }
94}
95
96impl CompressionEngine {
97 pub fn new() -> Self {
99 Self {
100 current_strategy: CompressionStrategy::default(),
101 algorithm_metrics: HashMap::new(),
102 huffman_tables: HashMap::new(),
103 }
104 }
105
106 pub fn compress(
108 &mut self,
109 data: &[u8],
110 algorithm: CompressionAlgorithm,
111 block_id: &BlockId,
112 block_type: BlockType,
113 ) -> SparseResult<CompressionResult> {
114 let start_time = std::time::Instant::now();
115 let original_size = data.len();
116
117 let compressed_data = match algorithm {
118 CompressionAlgorithm::None => data.to_vec(),
119 CompressionAlgorithm::RLE => self.compress_rle(data)?,
120 CompressionAlgorithm::Delta => self.compress_delta(data)?,
121 CompressionAlgorithm::Huffman => self.compress_huffman(data)?,
122 CompressionAlgorithm::LZ77 => self.compress_lz77(data)?,
123 CompressionAlgorithm::SparseOptimized => {
124 self.compress_sparse_optimized(data, block_type)?
125 }
126 CompressionAlgorithm::Adaptive => self.compress_adaptive(data, block_type)?,
127 };
128
129 let compression_time = start_time.elapsed().as_secs_f64();
130 let compression_ratio = if original_size > 0 {
131 compressed_data.len() as f64 / original_size as f64
132 } else {
133 1.0
134 };
135
136 self.update_algorithm_metrics(
138 algorithm,
139 compression_time,
140 original_size,
141 compressed_data.len(),
142 true,
143 );
144
145 Ok(CompressionResult {
146 compressed_data,
147 original_size,
148 compression_ratio,
149 compression_time,
150 algorithm_used: algorithm,
151 })
152 }
153
154 pub fn decompress(
156 &mut self,
157 compressed_data: &[u8],
158 algorithm: CompressionAlgorithm,
159 original_size: usize,
160 ) -> SparseResult<Vec<u8>> {
161 let start_time = std::time::Instant::now();
162
163 let decompressed_data = match algorithm {
164 CompressionAlgorithm::None => compressed_data.to_vec(),
165 CompressionAlgorithm::RLE => self.decompress_rle(compressed_data)?,
166 CompressionAlgorithm::Delta => self.decompress_delta(compressed_data)?,
167 CompressionAlgorithm::Huffman => self.decompress_huffman(compressed_data)?,
168 CompressionAlgorithm::LZ77 => self.decompress_lz77(compressed_data)?,
169 CompressionAlgorithm::SparseOptimized => {
170 self.decompress_sparse_optimized(compressed_data)?
171 }
172 CompressionAlgorithm::Adaptive => self.decompress_adaptive(compressed_data)?,
173 };
174
175 let decompression_time = start_time.elapsed().as_secs_f64();
176
177 self.update_algorithm_metrics(
179 algorithm,
180 decompression_time,
181 original_size,
182 compressed_data.len(),
183 true,
184 );
185
186 if decompressed_data.len() != original_size {
187 return Err(SparseError::ComputationError(format!(
188 "Decompression size mismatch: expected {}, got {}",
189 original_size,
190 decompressed_data.len()
191 )));
192 }
193
194 Ok(decompressed_data)
195 }
196
197 fn compress_rle(&self, data: &[u8]) -> SparseResult<Vec<u8>> {
199 if data.is_empty() {
200 return Ok(Vec::new());
201 }
202
203 let mut compressed = Vec::new();
204 let mut current_byte = data[0];
205 let mut count = 1u8;
206
207 for &byte in &data[1..] {
208 if byte == current_byte && count < 255 {
209 count += 1;
210 } else {
211 compressed.push(count);
212 compressed.push(current_byte);
213 current_byte = byte;
214 count = 1;
215 }
216 }
217
218 compressed.push(count);
220 compressed.push(current_byte);
221
222 Ok(compressed)
223 }
224
225 fn decompress_rle(&self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
227 if !compressed_data.len().is_multiple_of(2) {
228 return Err(SparseError::ComputationError(
229 "Invalid RLE data".to_string(),
230 ));
231 }
232
233 let mut decompressed = Vec::new();
234
235 for chunk in compressed_data.chunks(2) {
236 let count = chunk[0] as usize;
237 let byte = chunk[1];
238 decompressed.extend(std::iter::repeat_n(byte, count));
239 }
240
241 Ok(decompressed)
242 }
243
244 fn compress_delta(&self, data: &[u8]) -> SparseResult<Vec<u8>> {
246 if data.len() < 4 {
247 return Ok(data.to_vec()); }
249
250 let integers: Vec<u32> = data
252 .chunks(4)
253 .map(|chunk| {
254 if chunk.len() == 4 {
255 u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
256 } else {
257 0
258 }
259 })
260 .collect();
261
262 if integers.is_empty() {
263 return Ok(Vec::new());
264 }
265
266 let mut compressed = Vec::new();
267
268 compressed.extend(&integers[0].to_le_bytes());
270
271 for i in 1..integers.len() {
273 let delta = integers[i].wrapping_sub(integers[i - 1]);
274
275 if delta < 128 {
277 compressed.push(delta as u8);
278 } else if delta < 32768 {
279 compressed.push(0x80 | (delta as u8));
280 compressed.push((delta >> 7) as u8);
281 } else {
282 compressed.push(0xFF);
283 compressed.extend(&delta.to_le_bytes());
284 }
285 }
286
287 Ok(compressed)
288 }
289
290 fn decompress_delta(&self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
292 if compressed_data.len() < 4 {
293 return Ok(compressed_data.to_vec());
294 }
295
296 let mut decompressed = Vec::new();
297 let mut pos = 0;
298
299 if compressed_data.len() < 4 {
301 return Err(SparseError::ComputationError(
302 "Invalid delta data".to_string(),
303 ));
304 }
305
306 let first_value = u32::from_le_bytes([
307 compressed_data[0],
308 compressed_data[1],
309 compressed_data[2],
310 compressed_data[3],
311 ]);
312 decompressed.extend(&first_value.to_le_bytes());
313 pos += 4;
314
315 let mut current_value = first_value;
316
317 while pos < compressed_data.len() {
319 let delta = if compressed_data[pos] < 0x80 {
320 let d = compressed_data[pos] as u32;
321 pos += 1;
322 d
323 } else if compressed_data[pos] < 0xFF {
324 if pos + 1 >= compressed_data.len() {
325 break;
326 }
327 let d = ((compressed_data[pos] & 0x7F) as u32)
328 | ((compressed_data[pos + 1] as u32) << 7);
329 pos += 2;
330 d
331 } else {
332 if pos + 4 >= compressed_data.len() {
333 break;
334 }
335 let d = u32::from_le_bytes([
336 compressed_data[pos + 1],
337 compressed_data[pos + 2],
338 compressed_data[pos + 3],
339 compressed_data[pos + 4],
340 ]);
341 pos += 5;
342 d
343 };
344
345 current_value = current_value.wrapping_add(delta);
346 decompressed.extend(¤t_value.to_le_bytes());
347 }
348
349 Ok(decompressed)
350 }
351
352 fn compress_huffman(&mut self, data: &[u8]) -> SparseResult<Vec<u8>> {
354 if data.is_empty() {
355 return Ok(Vec::new());
356 }
357
358 let table = self.build_huffman_table(data);
359 let mut bit_stream = Vec::new();
360 let mut current_byte = 0u8;
361 let mut bit_count = 0;
362
363 for &byte in data {
364 if let Some(code) = table.encode_table.get(&byte) {
365 for &bit in code {
366 if bit {
367 current_byte |= 1 << (7 - bit_count);
368 }
369 bit_count += 1;
370
371 if bit_count == 8 {
372 bit_stream.push(current_byte);
373 current_byte = 0;
374 bit_count = 0;
375 }
376 }
377 }
378 }
379
380 if bit_count > 0 {
382 bit_stream.push(current_byte);
383 }
384
385 let mut result = Vec::new();
387 let table_data = self.serialize_huffman_table(&table)?;
388 result.extend(&(table_data.len() as u32).to_le_bytes());
389 result.extend(table_data);
390 result.push(bit_count); result.extend(bit_stream);
392
393 Ok(result)
394 }
395
396 fn decompress_huffman(&self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
398 if compressed_data.len() < 5 {
399 return Ok(Vec::new());
400 }
401
402 let table_size = u32::from_le_bytes([
403 compressed_data[0],
404 compressed_data[1],
405 compressed_data[2],
406 compressed_data[3],
407 ]) as usize;
408
409 if compressed_data.len() < 4 + table_size + 1 {
410 return Err(SparseError::ComputationError(
411 "Invalid Huffman data".to_string(),
412 ));
413 }
414
415 let table_data = &compressed_data[4..4 + table_size];
416 let table = self.deserialize_huffman_table(table_data)?;
417
418 let remaining_bits = compressed_data[4 + table_size];
419 let bit_stream = &compressed_data[4 + table_size + 1..];
420
421 let mut decompressed = Vec::new();
422 let mut current_node = &table.decode_tree;
423
424 for (i, &byte) in bit_stream.iter().enumerate() {
425 let bit_limit = if i == bit_stream.len() - 1 && remaining_bits > 0 {
426 remaining_bits
427 } else {
428 8
429 };
430
431 for bit_pos in 0..bit_limit {
432 let bit = (byte >> (7 - bit_pos)) & 1 == 1;
433
434 current_node = match current_node {
435 HuffmanNode::Internal { left, right } => {
436 if bit {
437 right
438 } else {
439 left
440 }
441 }
442 HuffmanNode::Leaf { value } => {
443 decompressed.push(*value);
444 &table.decode_tree
445 }
446 };
447
448 if let HuffmanNode::Leaf { value } = current_node {
449 decompressed.push(*value);
450 current_node = &table.decode_tree;
451 }
452 }
453 }
454
455 Ok(decompressed)
456 }
457
458 fn compress_lz77(&self, data: &[u8]) -> SparseResult<Vec<u8>> {
460 const WINDOW_SIZE: usize = 4096;
461 const LOOKAHEAD_SIZE: usize = 256;
462
463 if data.is_empty() {
464 return Ok(Vec::new());
465 }
466
467 let mut compressed = Vec::new();
468 let mut pos = 0;
469
470 while pos < data.len() {
471 let mut best_length = 0;
472 let mut best_distance = 0;
473
474 let window_start = pos.saturating_sub(WINDOW_SIZE);
476 let lookahead_end = (pos + LOOKAHEAD_SIZE).min(data.len());
477
478 for window_pos in window_start..pos {
479 let mut length = 0;
480 while window_pos + length < pos
481 && pos + length < lookahead_end
482 && data[window_pos + length] == data[pos + length]
483 {
484 length += 1;
485 }
486
487 if length > best_length {
488 best_length = length;
489 best_distance = pos - window_pos;
490 }
491 }
492
493 if best_length >= 3 {
494 compressed.push(0xFF); compressed.extend(&(best_distance as u16).to_le_bytes());
497 compressed.push(best_length as u8);
498 pos += best_length;
499 } else {
500 compressed.push(data[pos]);
502 pos += 1;
503 }
504 }
505
506 Ok(compressed)
507 }
508
509 fn decompress_lz77(&self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
511 let mut decompressed = Vec::new();
512 let mut pos = 0;
513
514 while pos < compressed_data.len() {
515 if compressed_data[pos] == 0xFF && pos + 3 < compressed_data.len() {
516 let distance =
518 u16::from_le_bytes([compressed_data[pos + 1], compressed_data[pos + 2]])
519 as usize;
520 let length = compressed_data[pos + 3] as usize;
521
522 if distance == 0 || distance > decompressed.len() {
523 return Err(SparseError::ComputationError(
524 "Invalid LZ77 distance".to_string(),
525 ));
526 }
527
528 let start_pos = decompressed.len() - distance;
529 for i in 0..length {
530 let byte = decompressed[start_pos + (i % distance)];
531 decompressed.push(byte);
532 }
533
534 pos += 4;
535 } else {
536 decompressed.push(compressed_data[pos]);
538 pos += 1;
539 }
540 }
541
542 Ok(decompressed)
543 }
544
545 fn compress_sparse_optimized(
547 &mut self,
548 data: &[u8],
549 block_type: BlockType,
550 ) -> SparseResult<Vec<u8>> {
551 match block_type {
552 BlockType::Indices => {
553 self.compress_delta(data)
555 }
556 BlockType::IndPtr => {
557 self.compress_delta(data)
559 }
560 BlockType::Data => {
561 self.compress_rle(data)
563 }
564 _ => {
565 self.compress_adaptive(data, block_type)
567 }
568 }
569 }
570
571 fn decompress_sparse_optimized(&self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
573 if let Ok(result) = self.decompress_delta(compressed_data) {
577 Ok(result)
578 } else if let Ok(result) = self.decompress_rle(compressed_data) {
579 Ok(result)
580 } else {
581 Ok(compressed_data.to_vec())
582 }
583 }
584
585 fn compress_adaptive(&mut self, data: &[u8], block_type: BlockType) -> SparseResult<Vec<u8>> {
587 let algorithms = vec![
588 CompressionAlgorithm::RLE,
589 CompressionAlgorithm::Delta,
590 CompressionAlgorithm::LZ77,
591 ];
592
593 let mut best_result = data.to_vec();
594 let mut best_algorithm = CompressionAlgorithm::None;
595
596 for algorithm in algorithms {
597 if let Ok(compressed) = match algorithm {
598 CompressionAlgorithm::RLE => self.compress_rle(data),
599 CompressionAlgorithm::Delta => self.compress_delta(data),
600 CompressionAlgorithm::LZ77 => self.compress_lz77(data),
601 _ => continue,
602 } {
603 if compressed.len() < best_result.len() {
604 best_result = compressed;
605 best_algorithm = algorithm;
606 }
607 }
608 }
609
610 let mut result = vec![best_algorithm as u8];
612 result.extend(best_result);
613 Ok(result)
614 }
615
616 fn decompress_adaptive(&mut self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
618 if compressed_data.is_empty() {
619 return Ok(Vec::new());
620 }
621
622 let algorithm_id = compressed_data[0];
623 let data = &compressed_data[1..];
624
625 match algorithm_id {
626 0 => Ok(data.to_vec()), 1 => self.decompress_rle(data), 2 => self.decompress_delta(data), 3 => self.decompress_huffman(data), 4 => self.decompress_lz77(data), _ => Err(SparseError::ComputationError(
632 "Unknown compression algorithm".to_string(),
633 )),
634 }
635 }
636
637 fn build_huffman_table(&mut self, data: &[u8]) -> HuffmanTable {
639 let mut frequencies = HashMap::new();
641 for &byte in data {
642 *frequencies.entry(byte).or_insert(0) += 1;
643 }
644
645 if frequencies.len() <= 1 {
646 let byte = data[0];
648 let mut encode_table = HashMap::new();
649 encode_table.insert(byte, vec![false]);
650 return HuffmanTable {
651 encode_table,
652 decode_tree: HuffmanNode::Leaf { value: byte },
653 };
654 }
655
656 let mut heap = std::collections::BinaryHeap::new();
658 for (byte, freq) in frequencies {
659 heap.push(FrequencyNode {
660 frequency: freq,
661 node: HuffmanNode::Leaf { value: byte },
662 });
663 }
664
665 while heap.len() > 1 {
666 let right = heap.pop().unwrap();
667 let left = heap.pop().unwrap();
668
669 heap.push(FrequencyNode {
670 frequency: left.frequency + right.frequency,
671 node: HuffmanNode::Internal {
672 left: Box::new(left.node),
673 right: Box::new(right.node),
674 },
675 });
676 }
677
678 let root = heap.pop().unwrap().node;
679
680 let mut encode_table = HashMap::new();
682 self.build_codes(&root, Vec::new(), &mut encode_table);
683
684 HuffmanTable {
685 encode_table,
686 decode_tree: root,
687 }
688 }
689
690 fn build_codes(
692 &self,
693 node: &HuffmanNode,
694 code: Vec<bool>,
695 encode_table: &mut HashMap<u8, Vec<bool>>,
696 ) {
697 match node {
698 HuffmanNode::Leaf { value } => {
699 encode_table.insert(*value, code);
700 }
701 HuffmanNode::Internal { left, right } => {
702 let mut left_code = code.clone();
703 left_code.push(false);
704 self.build_codes(left, left_code, encode_table);
705
706 let mut right_code = code;
707 right_code.push(true);
708 self.build_codes(right, right_code, encode_table);
709 }
710 }
711 }
712
713 fn serialize_huffman_table(&self, _table: &HuffmanTable) -> SparseResult<Vec<u8>> {
715 Ok(vec![0])
717 }
718
719 fn deserialize_huffman_table(&self, _data: &[u8]) -> SparseResult<HuffmanTable> {
721 Err(SparseError::ComputationError(
723 "Huffman table deserialization not implemented".to_string(),
724 ))
725 }
726
727 fn update_algorithm_metrics(
729 &mut self,
730 algorithm: CompressionAlgorithm,
731 time: f64,
732 original_size: usize,
733 compressed_size: usize,
734 success: bool,
735 ) {
736 let metrics = self
737 .algorithm_metrics
738 .entry(algorithm)
739 .or_insert_with(|| AlgorithmMetrics {
740 total_operations: 0,
741 total_compression_time: 0.0,
742 total_decompression_time: 0.0,
743 total_original_size: 0,
744 total_compressed_size: 0,
745 success_count: 0,
746 });
747
748 metrics.total_operations += 1;
749 metrics.total_compression_time += time;
750 metrics.total_original_size += original_size;
751 metrics.total_compressed_size += compressed_size;
752
753 if success {
754 metrics.success_count += 1;
755 }
756 }
757
758 pub fn get_algorithm_metrics(
760 &self,
761 algorithm: CompressionAlgorithm,
762 ) -> Option<&AlgorithmMetrics> {
763 self.algorithm_metrics.get(&algorithm)
764 }
765
766 pub fn select_best_algorithm(
768 &self,
769 data_size: usize,
770 block_type: BlockType,
771 ) -> CompressionAlgorithm {
772 match block_type {
773 BlockType::Indices | BlockType::IndPtr if data_size > 1024 => {
774 CompressionAlgorithm::Delta
775 }
776 BlockType::Data if data_size > 4096 => CompressionAlgorithm::LZ77,
777 _ if data_size > 512 => CompressionAlgorithm::RLE,
778 _ => CompressionAlgorithm::None,
779 }
780 }
781}
782
783impl CompressionStrategy {
784 pub fn new(algorithm: CompressionAlgorithm, block_size: usize) -> Self {
786 Self {
787 algorithm,
788 block_size,
789 hierarchical: false,
790 predicted_ratio: algorithm.expected_compression_ratio(),
791 actual_ratio: 1.0,
792 compression_speed: algorithm.compression_speed(),
793 decompression_speed: algorithm.compression_speed() * 1.5, }
795 }
796
797 pub fn update_performance(
799 &mut self,
800 actual_ratio: f64,
801 compression_speed: f64,
802 decompression_speed: f64,
803 ) {
804 self.actual_ratio = actual_ratio;
805 self.compression_speed = compression_speed;
806 self.decompression_speed = decompression_speed;
807 }
808
809 pub fn efficiency_score(&self) -> f64 {
811 let ratio_score = (1.0 - self.actual_ratio).max(0.0);
812 let speed_score = (self.compression_speed / 10.0).min(1.0);
813 (ratio_score + speed_score) / 2.0
814 }
815}
816
817impl Default for CompressionStrategy {
818 fn default() -> Self {
819 Self::new(CompressionAlgorithm::Adaptive, 1024 * 1024)
820 }
821}
822
823impl Default for CompressionEngine {
824 fn default() -> Self {
825 Self::new()
826 }
827}