Skip to main content

scirs2_sparse/adaptive_memory_compression/
compression.rs

1//! Compression algorithms and engine for adaptive memory compression
2//!
3//! This module implements various compression algorithms optimized for sparse matrix data,
4//! including run-length encoding, delta compression, Huffman coding, and adaptive strategies.
5
6use super::cache::BlockId;
7use super::compressed_data::{BlockType, CompressedBlock};
8use super::config::CompressionAlgorithm;
9use super::stats::SparsityPatternAnalysis;
10use crate::error::{SparseError, SparseResult};
11use scirs2_core::numeric::{Float, NumAssign, SparseElement};
12use std::collections::HashMap;
13
14/// Compression engine that handles all compression algorithms
15#[derive(Debug)]
16pub struct CompressionEngine {
17    /// Current compression strategy
18    current_strategy: CompressionStrategy,
19    /// Algorithm performance metrics
20    algorithm_metrics: HashMap<CompressionAlgorithm, AlgorithmMetrics>,
21    /// Huffman tables cache
22    huffman_tables: HashMap<String, HuffmanTable>,
23}
24
25/// Compression strategy configuration
26#[derive(Debug, Clone)]
27pub(crate) struct CompressionStrategy {
28    pub algorithm: CompressionAlgorithm,
29    pub block_size: usize,
30    pub hierarchical: bool,
31    pub predicted_ratio: f64,
32    pub actual_ratio: f64,
33    pub compression_speed: f64,
34    pub decompression_speed: f64,
35}
36
37/// Algorithm performance metrics
38#[derive(Debug, Clone)]
39pub struct AlgorithmMetrics {
40    pub total_operations: usize,
41    pub total_compression_time: f64,
42    pub total_decompression_time: f64,
43    pub total_original_size: usize,
44    pub total_compressed_size: usize,
45    pub success_count: usize,
46}
47
48/// Compression result with metadata
49#[derive(Debug)]
50pub struct CompressionResult {
51    pub compressed_data: Vec<u8>,
52    pub original_size: usize,
53    pub compression_ratio: f64,
54    pub compression_time: f64,
55    pub algorithm_used: CompressionAlgorithm,
56}
57
58/// Huffman coding table
59#[derive(Debug, Clone)]
60struct HuffmanTable {
61    encode_table: HashMap<u8, Vec<bool>>,
62    decode_tree: HuffmanNode,
63}
64
65/// Huffman tree node
66#[derive(Debug, Clone, PartialEq, Eq)]
67enum HuffmanNode {
68    Leaf {
69        value: u8,
70    },
71    Internal {
72        left: Box<HuffmanNode>,
73        right: Box<HuffmanNode>,
74    },
75}
76
77/// Frequency counter for Huffman coding
78#[derive(Debug, Clone, Eq, PartialEq)]
79struct FrequencyNode {
80    frequency: usize,
81    node: HuffmanNode,
82}
83
84impl std::cmp::Ord for FrequencyNode {
85    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
86        other.frequency.cmp(&self.frequency) // Reverse for min-heap
87    }
88}
89
90impl std::cmp::PartialOrd for FrequencyNode {
91    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
92        Some(self.cmp(other))
93    }
94}
95
96impl CompressionEngine {
97    /// Create a new compression engine
98    pub fn new() -> Self {
99        Self {
100            current_strategy: CompressionStrategy::default(),
101            algorithm_metrics: HashMap::new(),
102            huffman_tables: HashMap::new(),
103        }
104    }
105
106    /// Compress data using the specified algorithm
107    pub fn compress(
108        &mut self,
109        data: &[u8],
110        algorithm: CompressionAlgorithm,
111        block_id: &BlockId,
112        block_type: BlockType,
113    ) -> SparseResult<CompressionResult> {
114        let start_time = std::time::Instant::now();
115        let original_size = data.len();
116
117        let compressed_data = match algorithm {
118            CompressionAlgorithm::None => data.to_vec(),
119            CompressionAlgorithm::RLE => self.compress_rle(data)?,
120            CompressionAlgorithm::Delta => self.compress_delta(data)?,
121            CompressionAlgorithm::Huffman => self.compress_huffman(data)?,
122            CompressionAlgorithm::LZ77 => self.compress_lz77(data)?,
123            CompressionAlgorithm::SparseOptimized => {
124                self.compress_sparse_optimized(data, block_type)?
125            }
126            CompressionAlgorithm::Adaptive => self.compress_adaptive(data, block_type)?,
127        };
128
129        let compression_time = start_time.elapsed().as_secs_f64();
130        let compression_ratio = if original_size > 0 {
131            compressed_data.len() as f64 / original_size as f64
132        } else {
133            1.0
134        };
135
136        // Update algorithm metrics
137        self.update_algorithm_metrics(
138            algorithm,
139            compression_time,
140            original_size,
141            compressed_data.len(),
142            true,
143        );
144
145        Ok(CompressionResult {
146            compressed_data,
147            original_size,
148            compression_ratio,
149            compression_time,
150            algorithm_used: algorithm,
151        })
152    }
153
154    /// Decompress data using the specified algorithm
155    pub fn decompress(
156        &mut self,
157        compressed_data: &[u8],
158        algorithm: CompressionAlgorithm,
159        original_size: usize,
160    ) -> SparseResult<Vec<u8>> {
161        let start_time = std::time::Instant::now();
162
163        let decompressed_data = match algorithm {
164            CompressionAlgorithm::None => compressed_data.to_vec(),
165            CompressionAlgorithm::RLE => self.decompress_rle(compressed_data)?,
166            CompressionAlgorithm::Delta => self.decompress_delta(compressed_data)?,
167            CompressionAlgorithm::Huffman => self.decompress_huffman(compressed_data)?,
168            CompressionAlgorithm::LZ77 => self.decompress_lz77(compressed_data)?,
169            CompressionAlgorithm::SparseOptimized => {
170                self.decompress_sparse_optimized(compressed_data)?
171            }
172            CompressionAlgorithm::Adaptive => self.decompress_adaptive(compressed_data)?,
173        };
174
175        let decompression_time = start_time.elapsed().as_secs_f64();
176
177        // Update algorithm metrics
178        self.update_algorithm_metrics(
179            algorithm,
180            decompression_time,
181            original_size,
182            compressed_data.len(),
183            true,
184        );
185
186        if decompressed_data.len() != original_size {
187            return Err(SparseError::ComputationError(format!(
188                "Decompression size mismatch: expected {}, got {}",
189                original_size,
190                decompressed_data.len()
191            )));
192        }
193
194        Ok(decompressed_data)
195    }
196
197    /// Run-Length Encoding compression
198    fn compress_rle(&self, data: &[u8]) -> SparseResult<Vec<u8>> {
199        if data.is_empty() {
200            return Ok(Vec::new());
201        }
202
203        let mut compressed = Vec::new();
204        let mut current_byte = data[0];
205        let mut count = 1u8;
206
207        for &byte in &data[1..] {
208            if byte == current_byte && count < 255 {
209                count += 1;
210            } else {
211                compressed.push(count);
212                compressed.push(current_byte);
213                current_byte = byte;
214                count = 1;
215            }
216        }
217
218        // Add the last run
219        compressed.push(count);
220        compressed.push(current_byte);
221
222        Ok(compressed)
223    }
224
225    /// Run-Length Encoding decompression
226    fn decompress_rle(&self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
227        if !compressed_data.len().is_multiple_of(2) {
228            return Err(SparseError::ComputationError(
229                "Invalid RLE data".to_string(),
230            ));
231        }
232
233        let mut decompressed = Vec::new();
234
235        for chunk in compressed_data.chunks(2) {
236            let count = chunk[0] as usize;
237            let byte = chunk[1];
238            decompressed.extend(std::iter::repeat_n(byte, count));
239        }
240
241        Ok(decompressed)
242    }
243
244    /// Delta encoding compression (for sorted integer sequences)
245    fn compress_delta(&self, data: &[u8]) -> SparseResult<Vec<u8>> {
246        if data.len() < 4 {
247            return Ok(data.to_vec()); // Too small for delta encoding
248        }
249
250        // Interpret as u32 integers
251        let integers: Vec<u32> = data
252            .chunks(4)
253            .map(|chunk| {
254                if chunk.len() == 4 {
255                    u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
256                } else {
257                    0
258                }
259            })
260            .collect();
261
262        if integers.is_empty() {
263            return Ok(Vec::new());
264        }
265
266        let mut compressed = Vec::new();
267
268        // Store first value as-is
269        compressed.extend(&integers[0].to_le_bytes());
270
271        // Store deltas
272        for i in 1..integers.len() {
273            let delta = integers[i].wrapping_sub(integers[i - 1]);
274
275            // Use variable-length encoding for deltas
276            if delta < 128 {
277                compressed.push(delta as u8);
278            } else if delta < 32768 {
279                compressed.push(0x80 | (delta as u8));
280                compressed.push((delta >> 7) as u8);
281            } else {
282                compressed.push(0xFF);
283                compressed.extend(&delta.to_le_bytes());
284            }
285        }
286
287        Ok(compressed)
288    }
289
290    /// Delta encoding decompression
291    fn decompress_delta(&self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
292        if compressed_data.len() < 4 {
293            return Ok(compressed_data.to_vec());
294        }
295
296        let mut decompressed = Vec::new();
297        let mut pos = 0;
298
299        // Read first value
300        if compressed_data.len() < 4 {
301            return Err(SparseError::ComputationError(
302                "Invalid delta data".to_string(),
303            ));
304        }
305
306        let first_value = u32::from_le_bytes([
307            compressed_data[0],
308            compressed_data[1],
309            compressed_data[2],
310            compressed_data[3],
311        ]);
312        decompressed.extend(&first_value.to_le_bytes());
313        pos += 4;
314
315        let mut current_value = first_value;
316
317        // Read deltas
318        while pos < compressed_data.len() {
319            let delta = if compressed_data[pos] < 0x80 {
320                let d = compressed_data[pos] as u32;
321                pos += 1;
322                d
323            } else if compressed_data[pos] < 0xFF {
324                if pos + 1 >= compressed_data.len() {
325                    break;
326                }
327                let d = ((compressed_data[pos] & 0x7F) as u32)
328                    | ((compressed_data[pos + 1] as u32) << 7);
329                pos += 2;
330                d
331            } else {
332                if pos + 4 >= compressed_data.len() {
333                    break;
334                }
335                let d = u32::from_le_bytes([
336                    compressed_data[pos + 1],
337                    compressed_data[pos + 2],
338                    compressed_data[pos + 3],
339                    compressed_data[pos + 4],
340                ]);
341                pos += 5;
342                d
343            };
344
345            current_value = current_value.wrapping_add(delta);
346            decompressed.extend(&current_value.to_le_bytes());
347        }
348
349        Ok(decompressed)
350    }
351
352    /// Huffman encoding compression
353    fn compress_huffman(&mut self, data: &[u8]) -> SparseResult<Vec<u8>> {
354        if data.is_empty() {
355            return Ok(Vec::new());
356        }
357
358        let table = self.build_huffman_table(data);
359        let mut bit_stream = Vec::new();
360        let mut current_byte = 0u8;
361        let mut bit_count = 0;
362
363        for &byte in data {
364            if let Some(code) = table.encode_table.get(&byte) {
365                for &bit in code {
366                    if bit {
367                        current_byte |= 1 << (7 - bit_count);
368                    }
369                    bit_count += 1;
370
371                    if bit_count == 8 {
372                        bit_stream.push(current_byte);
373                        current_byte = 0;
374                        bit_count = 0;
375                    }
376                }
377            }
378        }
379
380        // Add remaining bits
381        if bit_count > 0 {
382            bit_stream.push(current_byte);
383        }
384
385        // Serialize table and data
386        let mut result = Vec::new();
387        let table_data = self.serialize_huffman_table(&table)?;
388        result.extend(&(table_data.len() as u32).to_le_bytes());
389        result.extend(table_data);
390        result.push(bit_count); // Store remaining bits count
391        result.extend(bit_stream);
392
393        Ok(result)
394    }
395
396    /// Huffman encoding decompression
397    fn decompress_huffman(&self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
398        if compressed_data.len() < 5 {
399            return Ok(Vec::new());
400        }
401
402        let table_size = u32::from_le_bytes([
403            compressed_data[0],
404            compressed_data[1],
405            compressed_data[2],
406            compressed_data[3],
407        ]) as usize;
408
409        if compressed_data.len() < 4 + table_size + 1 {
410            return Err(SparseError::ComputationError(
411                "Invalid Huffman data".to_string(),
412            ));
413        }
414
415        let table_data = &compressed_data[4..4 + table_size];
416        let table = self.deserialize_huffman_table(table_data)?;
417
418        let remaining_bits = compressed_data[4 + table_size];
419        let bit_stream = &compressed_data[4 + table_size + 1..];
420
421        let mut decompressed = Vec::new();
422        let mut current_node = &table.decode_tree;
423
424        for (i, &byte) in bit_stream.iter().enumerate() {
425            let bit_limit = if i == bit_stream.len() - 1 && remaining_bits > 0 {
426                remaining_bits
427            } else {
428                8
429            };
430
431            for bit_pos in 0..bit_limit {
432                let bit = (byte >> (7 - bit_pos)) & 1 == 1;
433
434                current_node = match current_node {
435                    HuffmanNode::Internal { left, right } => {
436                        if bit {
437                            right
438                        } else {
439                            left
440                        }
441                    }
442                    HuffmanNode::Leaf { value } => {
443                        decompressed.push(*value);
444                        &table.decode_tree
445                    }
446                };
447
448                if let HuffmanNode::Leaf { value } = current_node {
449                    decompressed.push(*value);
450                    current_node = &table.decode_tree;
451                }
452            }
453        }
454
455        Ok(decompressed)
456    }
457
458    /// Simple LZ77 compression
459    fn compress_lz77(&self, data: &[u8]) -> SparseResult<Vec<u8>> {
460        const WINDOW_SIZE: usize = 4096;
461        const LOOKAHEAD_SIZE: usize = 256;
462
463        if data.is_empty() {
464            return Ok(Vec::new());
465        }
466
467        let mut compressed = Vec::new();
468        let mut pos = 0;
469
470        while pos < data.len() {
471            let mut best_length = 0;
472            let mut best_distance = 0;
473
474            // Search for matches in the sliding window
475            let window_start = pos.saturating_sub(WINDOW_SIZE);
476            let lookahead_end = (pos + LOOKAHEAD_SIZE).min(data.len());
477
478            for window_pos in window_start..pos {
479                let mut length = 0;
480                while window_pos + length < pos
481                    && pos + length < lookahead_end
482                    && data[window_pos + length] == data[pos + length]
483                {
484                    length += 1;
485                }
486
487                if length > best_length {
488                    best_length = length;
489                    best_distance = pos - window_pos;
490                }
491            }
492
493            if best_length >= 3 {
494                // Encode as (distance, length)
495                compressed.push(0xFF); // Marker for encoded sequence
496                compressed.extend(&(best_distance as u16).to_le_bytes());
497                compressed.push(best_length as u8);
498                pos += best_length;
499            } else {
500                // Encode as literal
501                compressed.push(data[pos]);
502                pos += 1;
503            }
504        }
505
506        Ok(compressed)
507    }
508
509    /// Simple LZ77 decompression
510    fn decompress_lz77(&self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
511        let mut decompressed = Vec::new();
512        let mut pos = 0;
513
514        while pos < compressed_data.len() {
515            if compressed_data[pos] == 0xFF && pos + 3 < compressed_data.len() {
516                // Decode encoded sequence
517                let distance =
518                    u16::from_le_bytes([compressed_data[pos + 1], compressed_data[pos + 2]])
519                        as usize;
520                let length = compressed_data[pos + 3] as usize;
521
522                if distance == 0 || distance > decompressed.len() {
523                    return Err(SparseError::ComputationError(
524                        "Invalid LZ77 distance".to_string(),
525                    ));
526                }
527
528                let start_pos = decompressed.len() - distance;
529                for i in 0..length {
530                    let byte = decompressed[start_pos + (i % distance)];
531                    decompressed.push(byte);
532                }
533
534                pos += 4;
535            } else {
536                // Literal byte
537                decompressed.push(compressed_data[pos]);
538                pos += 1;
539            }
540        }
541
542        Ok(decompressed)
543    }
544
545    /// Sparse-optimized compression
546    fn compress_sparse_optimized(
547        &mut self,
548        data: &[u8],
549        block_type: BlockType,
550    ) -> SparseResult<Vec<u8>> {
551        match block_type {
552            BlockType::Indices => {
553                // Use delta encoding for indices (usually sorted)
554                self.compress_delta(data)
555            }
556            BlockType::IndPtr => {
557                // Use delta encoding for indptr (monotonic)
558                self.compress_delta(data)
559            }
560            BlockType::Data => {
561                // Use RLE for data values (may have many zeros)
562                self.compress_rle(data)
563            }
564            _ => {
565                // Use adaptive compression for other types
566                self.compress_adaptive(data, block_type)
567            }
568        }
569    }
570
571    /// Sparse-optimized decompression
572    fn decompress_sparse_optimized(&self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
573        // Try different decompression methods and return the first successful one
574        // In practice, you'd store the method used in the compressed data header
575
576        if let Ok(result) = self.decompress_delta(compressed_data) {
577            Ok(result)
578        } else if let Ok(result) = self.decompress_rle(compressed_data) {
579            Ok(result)
580        } else {
581            Ok(compressed_data.to_vec())
582        }
583    }
584
585    /// Adaptive compression (tries multiple algorithms)
586    fn compress_adaptive(&mut self, data: &[u8], block_type: BlockType) -> SparseResult<Vec<u8>> {
587        let algorithms = vec![
588            CompressionAlgorithm::RLE,
589            CompressionAlgorithm::Delta,
590            CompressionAlgorithm::LZ77,
591        ];
592
593        let mut best_result = data.to_vec();
594        let mut best_algorithm = CompressionAlgorithm::None;
595
596        for algorithm in algorithms {
597            if let Ok(compressed) = match algorithm {
598                CompressionAlgorithm::RLE => self.compress_rle(data),
599                CompressionAlgorithm::Delta => self.compress_delta(data),
600                CompressionAlgorithm::LZ77 => self.compress_lz77(data),
601                _ => continue,
602            } {
603                if compressed.len() < best_result.len() {
604                    best_result = compressed;
605                    best_algorithm = algorithm;
606                }
607            }
608        }
609
610        // Prepend algorithm identifier
611        let mut result = vec![best_algorithm as u8];
612        result.extend(best_result);
613        Ok(result)
614    }
615
616    /// Adaptive decompression
617    fn decompress_adaptive(&mut self, compressed_data: &[u8]) -> SparseResult<Vec<u8>> {
618        if compressed_data.is_empty() {
619            return Ok(Vec::new());
620        }
621
622        let algorithm_id = compressed_data[0];
623        let data = &compressed_data[1..];
624
625        match algorithm_id {
626            0 => Ok(data.to_vec()),             // None
627            1 => self.decompress_rle(data),     // RLE
628            2 => self.decompress_delta(data),   // Delta
629            3 => self.decompress_huffman(data), // Huffman
630            4 => self.decompress_lz77(data),    // LZ77
631            _ => Err(SparseError::ComputationError(
632                "Unknown compression algorithm".to_string(),
633            )),
634        }
635    }
636
637    /// Build Huffman table from data
638    fn build_huffman_table(&mut self, data: &[u8]) -> HuffmanTable {
639        // Count frequencies
640        let mut frequencies = HashMap::new();
641        for &byte in data {
642            *frequencies.entry(byte).or_insert(0) += 1;
643        }
644
645        if frequencies.len() <= 1 {
646            // Handle edge case: single character
647            let byte = data[0];
648            let mut encode_table = HashMap::new();
649            encode_table.insert(byte, vec![false]);
650            return HuffmanTable {
651                encode_table,
652                decode_tree: HuffmanNode::Leaf { value: byte },
653            };
654        }
655
656        // Build Huffman tree
657        let mut heap = std::collections::BinaryHeap::new();
658        for (byte, freq) in frequencies {
659            heap.push(FrequencyNode {
660                frequency: freq,
661                node: HuffmanNode::Leaf { value: byte },
662            });
663        }
664
665        while heap.len() > 1 {
666            let right = heap.pop().expect("Operation failed");
667            let left = heap.pop().expect("Operation failed");
668
669            heap.push(FrequencyNode {
670                frequency: left.frequency + right.frequency,
671                node: HuffmanNode::Internal {
672                    left: Box::new(left.node),
673                    right: Box::new(right.node),
674                },
675            });
676        }
677
678        let root = heap.pop().expect("Operation failed").node;
679
680        // Build encoding table
681        let mut encode_table = HashMap::new();
682        self.build_codes(&root, Vec::new(), &mut encode_table);
683
684        HuffmanTable {
685            encode_table,
686            decode_tree: root,
687        }
688    }
689
690    /// Build Huffman codes recursively
691    fn build_codes(
692        &self,
693        node: &HuffmanNode,
694        code: Vec<bool>,
695        encode_table: &mut HashMap<u8, Vec<bool>>,
696    ) {
697        match node {
698            HuffmanNode::Leaf { value } => {
699                encode_table.insert(*value, code);
700            }
701            HuffmanNode::Internal { left, right } => {
702                let mut left_code = code.clone();
703                left_code.push(false);
704                self.build_codes(left, left_code, encode_table);
705
706                let mut right_code = code;
707                right_code.push(true);
708                self.build_codes(right, right_code, encode_table);
709            }
710        }
711    }
712
713    /// Serialize Huffman table to bytes.
714    ///
715    /// Binary format:
716    ///   [num_symbols: u32 LE]
717    ///   for each entry: [symbol: u8, code_len: u8, code_bits: u32 LE]
718    ///
719    /// `code_bits` stores the code MSB-first in the low `code_len` bits of a u32
720    /// (bit 0 = leftmost/root step, bit (code_len-1) = deepest step).
721    fn serialize_huffman_table(&self, table: &HuffmanTable) -> SparseResult<Vec<u8>> {
722        let num_symbols = table.encode_table.len() as u32;
723        let mut out = Vec::with_capacity(4 + (table.encode_table.len() * 6));
724
725        // Write num_symbols (4 bytes LE)
726        out.extend_from_slice(&num_symbols.to_le_bytes());
727
728        // Write each symbol's code
729        let mut entries: Vec<(u8, &Vec<bool>)> = table
730            .encode_table
731            .iter()
732            .map(|(&sym, bits)| (sym, bits))
733            .collect();
734        // Sort for deterministic output
735        entries.sort_by_key(|(sym, _)| *sym);
736
737        for (symbol, bits) in entries {
738            let code_len = bits.len() as u8;
739            if bits.len() > 32 {
740                return Err(SparseError::ComputationError(format!(
741                    "Huffman code for symbol {} exceeds 32 bits (len={})",
742                    symbol,
743                    bits.len()
744                )));
745            }
746            // Pack bits into u32: first bit (index 0) goes to the MSB position (bit code_len-1)
747            let mut code_bits: u32 = 0;
748            for (i, &bit) in bits.iter().enumerate() {
749                if bit {
750                    let shift = (bits.len() - 1 - i) as u32;
751                    code_bits |= 1u32 << shift;
752                }
753            }
754            out.push(symbol);
755            out.push(code_len);
756            out.extend_from_slice(&code_bits.to_le_bytes());
757        }
758
759        Ok(out)
760    }
761
762    /// Deserialize Huffman table from bytes produced by `serialize_huffman_table`.
763    ///
764    /// Reconstructs the encode table and rebuilds the decode tree by
765    /// inserting each `(symbol, code_len, code_bits)` tuple into a trie.
766    fn deserialize_huffman_table(&self, data: &[u8]) -> SparseResult<HuffmanTable> {
767        if data.len() < 4 {
768            return Err(SparseError::ComputationError(
769                "Huffman table data too short for header".to_string(),
770            ));
771        }
772
773        let num_symbols = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
774        let expected_len = 4 + num_symbols * 6;
775        if data.len() < expected_len {
776            return Err(SparseError::ComputationError(format!(
777                "Huffman table data too short: expected {} bytes, got {}",
778                expected_len,
779                data.len()
780            )));
781        }
782
783        let mut encode_table: HashMap<u8, Vec<bool>> = HashMap::with_capacity(num_symbols);
784
785        // Parse each entry and build encode table
786        for i in 0..num_symbols {
787            let base = 4 + i * 6;
788            let symbol = data[base];
789            let code_len = data[base + 1] as usize;
790            let code_bits = u32::from_le_bytes([
791                data[base + 2],
792                data[base + 3],
793                data[base + 4],
794                data[base + 5],
795            ]);
796
797            if code_len > 32 {
798                return Err(SparseError::ComputationError(format!(
799                    "code_len {} for symbol {} exceeds 32",
800                    code_len, symbol
801                )));
802            }
803
804            // Unpack bits (MSB of the packed u32 = first step from root)
805            let mut bits = Vec::with_capacity(code_len);
806            for j in 0..code_len {
807                let shift = (code_len - 1 - j) as u32;
808                bits.push((code_bits >> shift) & 1 == 1);
809            }
810            encode_table.insert(symbol, bits);
811        }
812
813        // Rebuild decode tree from encode table
814        // Use a recursive trie represented as a mutable HuffmanNode
815        fn insert_leaf(node: &mut HuffmanNode, bits: &[bool], symbol: u8) -> Result<(), String> {
816            if bits.is_empty() {
817                *node = HuffmanNode::Leaf { value: symbol };
818                return Ok(());
819            }
820            match node {
821                HuffmanNode::Leaf { .. } => Err(format!("Conflict at leaf for symbol {}", symbol)),
822                HuffmanNode::Internal { left, right } => {
823                    if bits[0] {
824                        insert_leaf(right, &bits[1..], symbol)
825                    } else {
826                        insert_leaf(left, &bits[1..], symbol)
827                    }
828                }
829            }
830        }
831
832        // Start with a dummy internal node; expand as needed
833        fn ensure_internal(node: &mut HuffmanNode) {
834            if matches!(node, HuffmanNode::Leaf { .. }) {
835                *node = HuffmanNode::Internal {
836                    left: Box::new(HuffmanNode::Leaf { value: 0 }),
837                    right: Box::new(HuffmanNode::Leaf { value: 0 }),
838                };
839            }
840        }
841
842        fn insert(node: &mut HuffmanNode, bits: &[bool], symbol: u8) -> Result<(), String> {
843            if bits.is_empty() {
844                *node = HuffmanNode::Leaf { value: symbol };
845                return Ok(());
846            }
847            ensure_internal(node);
848            match node {
849                HuffmanNode::Internal { left, right } => {
850                    if bits[0] {
851                        insert(right, &bits[1..], symbol)
852                    } else {
853                        insert(left, &bits[1..], symbol)
854                    }
855                }
856                HuffmanNode::Leaf { .. } => unreachable!(),
857            }
858        }
859
860        let mut decode_tree = HuffmanNode::Internal {
861            left: Box::new(HuffmanNode::Leaf { value: 0 }),
862            right: Box::new(HuffmanNode::Leaf { value: 0 }),
863        };
864
865        // Handle single-symbol edge case
866        if num_symbols == 1 {
867            let (&symbol, _) = encode_table
868                .iter()
869                .next()
870                .ok_or_else(|| SparseError::ComputationError("empty encode table".to_string()))?;
871            decode_tree = HuffmanNode::Leaf { value: symbol };
872        } else {
873            // Sort by code_len then symbol for deterministic insertion order
874            let mut sorted_entries: Vec<(u8, &Vec<bool>)> =
875                encode_table.iter().map(|(&s, b)| (s, b)).collect();
876            sorted_entries.sort_by(|a, b| a.1.len().cmp(&b.1.len()).then(a.0.cmp(&b.0)));
877
878            for (symbol, bits) in &sorted_entries {
879                insert(&mut decode_tree, bits, *symbol).map_err(|e| {
880                    SparseError::ComputationError(format!("Huffman tree build error: {}", e))
881                })?;
882            }
883        }
884
885        Ok(HuffmanTable {
886            encode_table,
887            decode_tree,
888        })
889    }
890
891    /// Update algorithm performance metrics
892    fn update_algorithm_metrics(
893        &mut self,
894        algorithm: CompressionAlgorithm,
895        time: f64,
896        original_size: usize,
897        compressed_size: usize,
898        success: bool,
899    ) {
900        let metrics = self
901            .algorithm_metrics
902            .entry(algorithm)
903            .or_insert_with(|| AlgorithmMetrics {
904                total_operations: 0,
905                total_compression_time: 0.0,
906                total_decompression_time: 0.0,
907                total_original_size: 0,
908                total_compressed_size: 0,
909                success_count: 0,
910            });
911
912        metrics.total_operations += 1;
913        metrics.total_compression_time += time;
914        metrics.total_original_size += original_size;
915        metrics.total_compressed_size += compressed_size;
916
917        if success {
918            metrics.success_count += 1;
919        }
920    }
921
922    /// Get algorithm performance metrics
923    pub fn get_algorithm_metrics(
924        &self,
925        algorithm: CompressionAlgorithm,
926    ) -> Option<&AlgorithmMetrics> {
927        self.algorithm_metrics.get(&algorithm)
928    }
929
930    /// Get best algorithm for given data characteristics
931    pub fn select_best_algorithm(
932        &self,
933        data_size: usize,
934        block_type: BlockType,
935    ) -> CompressionAlgorithm {
936        match block_type {
937            BlockType::Indices | BlockType::IndPtr if data_size > 1024 => {
938                CompressionAlgorithm::Delta
939            }
940            BlockType::Data if data_size > 4096 => CompressionAlgorithm::LZ77,
941            _ if data_size > 512 => CompressionAlgorithm::RLE,
942            _ => CompressionAlgorithm::None,
943        }
944    }
945}
946
947impl CompressionStrategy {
948    /// Create a new compression strategy
949    pub fn new(algorithm: CompressionAlgorithm, block_size: usize) -> Self {
950        Self {
951            algorithm,
952            block_size,
953            hierarchical: false,
954            predicted_ratio: algorithm.expected_compression_ratio(),
955            actual_ratio: 1.0,
956            compression_speed: algorithm.compression_speed(),
957            decompression_speed: algorithm.compression_speed() * 1.5, // Decompression usually faster
958        }
959    }
960
961    /// Update actual performance metrics
962    pub fn update_performance(
963        &mut self,
964        actual_ratio: f64,
965        compression_speed: f64,
966        decompression_speed: f64,
967    ) {
968        self.actual_ratio = actual_ratio;
969        self.compression_speed = compression_speed;
970        self.decompression_speed = decompression_speed;
971    }
972
973    /// Get efficiency score
974    pub fn efficiency_score(&self) -> f64 {
975        let ratio_score = (1.0 - self.actual_ratio).max(0.0);
976        let speed_score = (self.compression_speed / 10.0).min(1.0);
977        (ratio_score + speed_score) / 2.0
978    }
979}
980
981impl Default for CompressionStrategy {
982    fn default() -> Self {
983        Self::new(CompressionAlgorithm::Adaptive, 1024 * 1024)
984    }
985}
986
987impl Default for CompressionEngine {
988    fn default() -> Self {
989        Self::new()
990    }
991}