Skip to main content

blockchain_compression/algorithms/
multi_pass.rs

1//! Multi-pass compression for iterative improvement
2//!
3//! Applies multiple compression strategies in sequence to achieve maximum compression.
4
5use crate::core::traits::{
6    CompressionStrategy, PipelineCompressionStrategy,
7    CompressionError, CompressionMetadata, CompressionStats
8};
9use crate::algorithms::enhanced_ctw::EnhancedCTW;
10use std::collections::HashMap;
11
12/// Multi-pass compression engine that applies multiple strategies
13pub struct MultiPassCompressor {
14    /// Individual compression stages
15    stages: Vec<EnhancedCTW>,
16
17    /// Stage enablement flags
18    enabled_stages: Vec<bool>,
19
20    /// Number of compression passes
21    max_passes: usize,
22
23    /// Improvement threshold to continue passes
24    improvement_threshold: f32,
25
26    /// Pass-specific strategies
27    pass_strategies: Vec<PassStrategy>,
28
29    /// Performance statistics
30    stats: CompressionStats,
31
32    /// Stage-specific statistics
33    stage_stats: Vec<CompressionStats>,
34}
35
36#[derive(Debug, Clone)]
37pub enum PassStrategy {
38    /// Pattern replacement pass
39    PatternReplacement,
40    /// Context prediction pass
41    ContextPrediction,
42    /// Dictionary compression pass
43    DictionaryCompression,
44    /// Arithmetic coding pass
45    ArithmeticCoding,
46}
47
48impl MultiPassCompressor {
49    /// Create a new multi-pass compressor
50    pub fn new() -> Self {
51        Self {
52            stages: Vec::new(),
53            enabled_stages: Vec::new(),
54            max_passes: 3,
55            improvement_threshold: 0.05, // 5% improvement to continue
56            pass_strategies: vec![
57                PassStrategy::PatternReplacement,
58                PassStrategy::ContextPrediction,
59                PassStrategy::ArithmeticCoding,
60            ],
61            stats: CompressionStats::new(),
62            stage_stats: Vec::new(),
63        }
64    }
65
66    /// Create with custom configuration
67    pub fn with_config(max_passes: usize, improvement_threshold: f32) -> Self {
68        let mut compressor = Self::new();
69        compressor.max_passes = max_passes;
70        compressor.improvement_threshold = improvement_threshold;
71        compressor
72    }
73
74    /// Add a compression strategy to the pass
75    pub fn add_strategy(&mut self, strategy: PassStrategy) {
76        self.pass_strategies.push(strategy);
77    }
78
79    /// Apply multi-pass compression
80    pub fn compress_with_passes(&mut self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
81        let mut current_data = data.to_vec();
82        let mut best_data = current_data.clone();
83        let mut best_ratio = 1.0f32;
84
85        log::info!("Starting multi-pass compression with {} passes", self.max_passes);
86
87        for pass in 0..self.max_passes {
88            // Apply compression strategy for this pass
89            let compressed = self.apply_pass_strategy(pass, &current_data)?;
90            let ratio = current_data.len() as f32 / compressed.len() as f32;
91
92            log::info!("Pass {}: {:.2}:1 compression", pass + 1, ratio);
93
94            // Check if this pass improved compression
95            if ratio > best_ratio * (1.0 + self.improvement_threshold) {
96                best_data = compressed.clone();
97                best_ratio = ratio;
98                current_data = compressed; // Use for next pass
99            } else {
100                log::info!(
101                    "Stopping after {} passes (improvement < {:.1}%)",
102                    pass + 1,
103                    self.improvement_threshold * 100.0
104                );
105                break;
106            }
107        }
108
109        Ok(best_data)
110    }
111
112    fn apply_pass_strategy(&mut self, pass: usize, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
113        let strategy_index = pass % self.pass_strategies.len();
114        let strategy = &self.pass_strategies[strategy_index];
115
116        match strategy {
117            PassStrategy::PatternReplacement => self.apply_pattern_replacement(data),
118            PassStrategy::ContextPrediction => self.apply_context_prediction(data),
119            PassStrategy::DictionaryCompression => self.apply_dictionary_compression(data),
120            PassStrategy::ArithmeticCoding => self.apply_arithmetic_coding(data),
121        }
122    }
123
124    fn apply_pattern_replacement(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
125        // Simple pattern replacement for common sequences
126        let mut result = Vec::new();
127        let mut pos = 0;
128
129        while pos < data.len() {
130            // Look for repeating patterns
131            let mut found_pattern = false;
132
133            // Check for 4-byte patterns
134            if pos + 8 <= data.len() && data[pos..pos+4] == data[pos+4..pos+8] {
135                result.push(0xFF); // Pattern marker
136                result.push(4); // Pattern length
137                result.extend_from_slice(&data[pos..pos+4]);
138                pos += 8;
139                found_pattern = true;
140            }
141
142            if !found_pattern {
143                result.push(data[pos]);
144                pos += 1;
145            }
146        }
147
148        Ok(result)
149    }
150
151    fn apply_context_prediction(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
152        // Use Enhanced CTW for context prediction
153        #[cfg(feature = "deflate")]
154        {
155            use flate2::{Compression, write::DeflateEncoder};
156            use std::io::Write;
157
158            let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
159            encoder.write_all(data)?;
160            encoder.finish().map_err(CompressionError::from)
161        }
162
163        #[cfg(not(feature = "deflate"))]
164        {
165            // Fallback to simple compression
166            Ok(data.to_vec())
167        }
168    }
169
170    fn apply_dictionary_compression(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
171        // Build a simple dictionary of common sequences
172        let mut dictionary = HashMap::new();
173        let mut dict_id = 0u8;
174
175        // Find common 4-byte sequences
176        for i in 0..data.len().saturating_sub(4) {
177            let sequence = &data[i..i+4];
178            *dictionary.entry(sequence.to_vec()).or_insert(0usize) += 1;
179        }
180
181        // Keep only sequences that appear more than once
182        let useful_dict: HashMap<Vec<u8>, u8> = dictionary
183            .into_iter()
184            .filter(|(_, count)| *count > 1)
185            .take(255) // Limit dictionary size
186            .map(|(seq, _)| {
187                let id = dict_id;
188                dict_id += 1;
189                (seq, id)
190            })
191            .collect();
192
193        // Apply dictionary compression
194        let mut result = Vec::new();
195        let mut pos = 0;
196
197        while pos < data.len() {
198            let mut found = false;
199
200            // Try to match dictionary entries
201            for len in (4..=8).rev() {
202                if pos + len <= data.len() {
203                    let sequence = &data[pos..pos+len];
204                    if let Some(&dict_id) = useful_dict.get(sequence) {
205                        result.push(0xFE); // Dictionary marker
206                        result.push(dict_id);
207                        pos += len;
208                        found = true;
209                        break;
210                    }
211                }
212            }
213
214            if !found {
215                result.push(data[pos]);
216                pos += 1;
217            }
218        }
219
220        Ok(result)
221    }
222
223    fn apply_arithmetic_coding(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
224        // Simple arithmetic coding simulation with standard compression
225        #[cfg(any(feature = "deflate", feature = "lz4", feature = "zstd"))]
226        {
227            self.apply_best_available_compression(data)
228        }
229
230        #[cfg(not(any(feature = "deflate", feature = "lz4", feature = "zstd")))]
231        {
232            // No compression backend available
233            Ok(data.to_vec())
234        }
235    }
236
237    #[cfg(any(feature = "deflate", feature = "lz4", feature = "zstd"))]
238    fn apply_best_available_compression(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
239        #[cfg(feature = "zstd")]
240        {
241            zstd::bulk::compress(data, 19)
242                .map_err(|e| CompressionError::Internal {
243                    message: format!("Zstd compression failed: {}", e)
244                })
245        }
246
247        #[cfg(all(feature = "lz4", not(feature = "zstd")))]
248        {
249            Ok(lz4_flex::compress_prepend_size(data))
250        }
251
252        #[cfg(all(feature = "deflate", not(feature = "lz4"), not(feature = "zstd")))]
253        {
254            use flate2::{Compression, write::DeflateEncoder};
255            use std::io::Write;
256
257            let mut encoder = DeflateEncoder::new(Vec::new(), Compression::best());
258            encoder.write_all(data)?;
259            encoder.finish().map_err(CompressionError::from)
260        }
261    }
262}
263
264impl CompressionStrategy for MultiPassCompressor {
265    type Error = CompressionError;
266
267    fn compress(&mut self, data: &[u8]) -> Result<Vec<u8>, Self::Error> {
268        let start_time = std::time::Instant::now();
269
270        let compressed = self.compress_with_passes(data)?;
271
272        let compression_time = start_time.elapsed().as_nanos() as u64;
273        self.stats.record_compression(data.len(), compressed.len(), compression_time);
274
275        Ok(compressed)
276    }
277
278    fn decompress(&self, _data: &[u8]) -> Result<Vec<u8>, Self::Error> {
279        // Multi-pass decompression would need to reverse each pass
280        // For now, this is left as an exercise for complete implementation
281        Err(CompressionError::Internal {
282            message: "Multi-pass decompression not yet implemented".to_string()
283        })
284    }
285
286    fn metadata(&self) -> CompressionMetadata {
287        CompressionMetadata {
288            name: "Multi-Pass Compressor".to_string(),
289            version: "1.0.0".to_string(),
290            description: "Multi-pass compression with iterative improvement".to_string(),
291            deterministic: false, // Due to adaptive behavior
292            memory_usage: std::mem::size_of::<Self>(),
293            domains: vec!["general".to_string(), "blockchain".to_string()],
294        }
295    }
296
297    fn stats(&self) -> CompressionStats {
298        self.stats.clone()
299    }
300
301    fn reset(&mut self) {
302        self.stages.clear();
303        self.enabled_stages.clear();
304        self.stats = CompressionStats::new();
305        self.stage_stats.clear();
306    }
307}
308
309impl PipelineCompressionStrategy for MultiPassCompressor {
310    type Stage = EnhancedCTW; // Simplified for now - in a full implementation, this would be a trait object
311
312    fn add_stage(&mut self, stage: Self::Stage) -> Result<(), Self::Error> {
313        self.stages.push(stage);
314        self.enabled_stages.push(true);
315        self.stage_stats.push(CompressionStats::new());
316        Ok(())
317    }
318
319    fn remove_stage(&mut self, index: usize) -> Result<Self::Stage, Self::Error> {
320        if index >= self.stages.len() {
321            return Err(CompressionError::Pipeline {
322                stage: index,
323                message: "Stage index out of bounds".to_string(),
324            });
325        }
326
327        self.enabled_stages.remove(index);
328        self.stage_stats.remove(index);
329        Ok(self.stages.remove(index))
330    }
331
332    fn stage_count(&self) -> usize {
333        self.stages.len()
334    }
335
336    fn stage_stats(&self) -> Vec<CompressionStats> {
337        self.stage_stats.clone()
338    }
339
340    fn set_stage_enabled(&mut self, index: usize, enabled: bool) -> Result<(), Self::Error> {
341        if index >= self.enabled_stages.len() {
342            return Err(CompressionError::Pipeline {
343                stage: index,
344                message: "Stage index out of bounds".to_string(),
345            });
346        }
347
348        self.enabled_stages[index] = enabled;
349        Ok(())
350    }
351}
352
353impl Default for MultiPassCompressor {
354    fn default() -> Self {
355        Self::new()
356    }
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[test]
364    fn test_multi_pass_basic() {
365        let mut compressor = MultiPassCompressor::new();
366        let test_data = b"AAAA BBBB CCCC AAAA BBBB CCCC".repeat(10);
367
368        let compressed = compressor.compress(&test_data).unwrap();
369
370        // Should achieve some compression on repetitive data
371        assert!(compressed.len() < test_data.len());
372    }
373
374    #[test]
375    fn test_pipeline_operations() {
376        let mut compressor = MultiPassCompressor::new();
377
378        assert_eq!(compressor.stage_count(), 0);
379
380        // Pipeline operations would be tested with actual stages
381        // This is a placeholder for the interface
382    }
383}