blockchain_compression/algorithms/
multi_pass.rs1use crate::core::traits::{
6 CompressionStrategy, PipelineCompressionStrategy,
7 CompressionError, CompressionMetadata, CompressionStats
8};
9use crate::algorithms::enhanced_ctw::EnhancedCTW;
10use std::collections::HashMap;
11
12pub struct MultiPassCompressor {
14 stages: Vec<EnhancedCTW>,
16
17 enabled_stages: Vec<bool>,
19
20 max_passes: usize,
22
23 improvement_threshold: f32,
25
26 pass_strategies: Vec<PassStrategy>,
28
29 stats: CompressionStats,
31
32 stage_stats: Vec<CompressionStats>,
34}
35
36#[derive(Debug, Clone)]
37pub enum PassStrategy {
38 PatternReplacement,
40 ContextPrediction,
42 DictionaryCompression,
44 ArithmeticCoding,
46}
47
48impl MultiPassCompressor {
49 pub fn new() -> Self {
51 Self {
52 stages: Vec::new(),
53 enabled_stages: Vec::new(),
54 max_passes: 3,
55 improvement_threshold: 0.05, pass_strategies: vec![
57 PassStrategy::PatternReplacement,
58 PassStrategy::ContextPrediction,
59 PassStrategy::ArithmeticCoding,
60 ],
61 stats: CompressionStats::new(),
62 stage_stats: Vec::new(),
63 }
64 }
65
66 pub fn with_config(max_passes: usize, improvement_threshold: f32) -> Self {
68 let mut compressor = Self::new();
69 compressor.max_passes = max_passes;
70 compressor.improvement_threshold = improvement_threshold;
71 compressor
72 }
73
74 pub fn add_strategy(&mut self, strategy: PassStrategy) {
76 self.pass_strategies.push(strategy);
77 }
78
79 pub fn compress_with_passes(&mut self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
81 let mut current_data = data.to_vec();
82 let mut best_data = current_data.clone();
83 let mut best_ratio = 1.0f32;
84
85 log::info!("Starting multi-pass compression with {} passes", self.max_passes);
86
87 for pass in 0..self.max_passes {
88 let compressed = self.apply_pass_strategy(pass, ¤t_data)?;
90 let ratio = current_data.len() as f32 / compressed.len() as f32;
91
92 log::info!("Pass {}: {:.2}:1 compression", pass + 1, ratio);
93
94 if ratio > best_ratio * (1.0 + self.improvement_threshold) {
96 best_data = compressed.clone();
97 best_ratio = ratio;
98 current_data = compressed; } else {
100 log::info!(
101 "Stopping after {} passes (improvement < {:.1}%)",
102 pass + 1,
103 self.improvement_threshold * 100.0
104 );
105 break;
106 }
107 }
108
109 Ok(best_data)
110 }
111
112 fn apply_pass_strategy(&mut self, pass: usize, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
113 let strategy_index = pass % self.pass_strategies.len();
114 let strategy = &self.pass_strategies[strategy_index];
115
116 match strategy {
117 PassStrategy::PatternReplacement => self.apply_pattern_replacement(data),
118 PassStrategy::ContextPrediction => self.apply_context_prediction(data),
119 PassStrategy::DictionaryCompression => self.apply_dictionary_compression(data),
120 PassStrategy::ArithmeticCoding => self.apply_arithmetic_coding(data),
121 }
122 }
123
124 fn apply_pattern_replacement(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
125 let mut result = Vec::new();
127 let mut pos = 0;
128
129 while pos < data.len() {
130 let mut found_pattern = false;
132
133 if pos + 8 <= data.len() && data[pos..pos+4] == data[pos+4..pos+8] {
135 result.push(0xFF); result.push(4); result.extend_from_slice(&data[pos..pos+4]);
138 pos += 8;
139 found_pattern = true;
140 }
141
142 if !found_pattern {
143 result.push(data[pos]);
144 pos += 1;
145 }
146 }
147
148 Ok(result)
149 }
150
151 fn apply_context_prediction(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
152 #[cfg(feature = "deflate")]
154 {
155 use flate2::{Compression, write::DeflateEncoder};
156 use std::io::Write;
157
158 let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
159 encoder.write_all(data)?;
160 encoder.finish().map_err(CompressionError::from)
161 }
162
163 #[cfg(not(feature = "deflate"))]
164 {
165 Ok(data.to_vec())
167 }
168 }
169
170 fn apply_dictionary_compression(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
171 let mut dictionary = HashMap::new();
173 let mut dict_id = 0u8;
174
175 for i in 0..data.len().saturating_sub(4) {
177 let sequence = &data[i..i+4];
178 *dictionary.entry(sequence.to_vec()).or_insert(0usize) += 1;
179 }
180
181 let useful_dict: HashMap<Vec<u8>, u8> = dictionary
183 .into_iter()
184 .filter(|(_, count)| *count > 1)
185 .take(255) .map(|(seq, _)| {
187 let id = dict_id;
188 dict_id += 1;
189 (seq, id)
190 })
191 .collect();
192
193 let mut result = Vec::new();
195 let mut pos = 0;
196
197 while pos < data.len() {
198 let mut found = false;
199
200 for len in (4..=8).rev() {
202 if pos + len <= data.len() {
203 let sequence = &data[pos..pos+len];
204 if let Some(&dict_id) = useful_dict.get(sequence) {
205 result.push(0xFE); result.push(dict_id);
207 pos += len;
208 found = true;
209 break;
210 }
211 }
212 }
213
214 if !found {
215 result.push(data[pos]);
216 pos += 1;
217 }
218 }
219
220 Ok(result)
221 }
222
223 fn apply_arithmetic_coding(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
224 #[cfg(any(feature = "deflate", feature = "lz4", feature = "zstd"))]
226 {
227 self.apply_best_available_compression(data)
228 }
229
230 #[cfg(not(any(feature = "deflate", feature = "lz4", feature = "zstd")))]
231 {
232 Ok(data.to_vec())
234 }
235 }
236
237 #[cfg(any(feature = "deflate", feature = "lz4", feature = "zstd"))]
238 fn apply_best_available_compression(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
239 #[cfg(feature = "zstd")]
240 {
241 zstd::bulk::compress(data, 19)
242 .map_err(|e| CompressionError::Internal {
243 message: format!("Zstd compression failed: {}", e)
244 })
245 }
246
247 #[cfg(all(feature = "lz4", not(feature = "zstd")))]
248 {
249 Ok(lz4_flex::compress_prepend_size(data))
250 }
251
252 #[cfg(all(feature = "deflate", not(feature = "lz4"), not(feature = "zstd")))]
253 {
254 use flate2::{Compression, write::DeflateEncoder};
255 use std::io::Write;
256
257 let mut encoder = DeflateEncoder::new(Vec::new(), Compression::best());
258 encoder.write_all(data)?;
259 encoder.finish().map_err(CompressionError::from)
260 }
261 }
262}
263
264impl CompressionStrategy for MultiPassCompressor {
265 type Error = CompressionError;
266
267 fn compress(&mut self, data: &[u8]) -> Result<Vec<u8>, Self::Error> {
268 let start_time = std::time::Instant::now();
269
270 let compressed = self.compress_with_passes(data)?;
271
272 let compression_time = start_time.elapsed().as_nanos() as u64;
273 self.stats.record_compression(data.len(), compressed.len(), compression_time);
274
275 Ok(compressed)
276 }
277
278 fn decompress(&self, _data: &[u8]) -> Result<Vec<u8>, Self::Error> {
279 Err(CompressionError::Internal {
282 message: "Multi-pass decompression not yet implemented".to_string()
283 })
284 }
285
286 fn metadata(&self) -> CompressionMetadata {
287 CompressionMetadata {
288 name: "Multi-Pass Compressor".to_string(),
289 version: "1.0.0".to_string(),
290 description: "Multi-pass compression with iterative improvement".to_string(),
291 deterministic: false, memory_usage: std::mem::size_of::<Self>(),
293 domains: vec!["general".to_string(), "blockchain".to_string()],
294 }
295 }
296
297 fn stats(&self) -> CompressionStats {
298 self.stats.clone()
299 }
300
301 fn reset(&mut self) {
302 self.stages.clear();
303 self.enabled_stages.clear();
304 self.stats = CompressionStats::new();
305 self.stage_stats.clear();
306 }
307}
308
309impl PipelineCompressionStrategy for MultiPassCompressor {
310 type Stage = EnhancedCTW; fn add_stage(&mut self, stage: Self::Stage) -> Result<(), Self::Error> {
313 self.stages.push(stage);
314 self.enabled_stages.push(true);
315 self.stage_stats.push(CompressionStats::new());
316 Ok(())
317 }
318
319 fn remove_stage(&mut self, index: usize) -> Result<Self::Stage, Self::Error> {
320 if index >= self.stages.len() {
321 return Err(CompressionError::Pipeline {
322 stage: index,
323 message: "Stage index out of bounds".to_string(),
324 });
325 }
326
327 self.enabled_stages.remove(index);
328 self.stage_stats.remove(index);
329 Ok(self.stages.remove(index))
330 }
331
332 fn stage_count(&self) -> usize {
333 self.stages.len()
334 }
335
336 fn stage_stats(&self) -> Vec<CompressionStats> {
337 self.stage_stats.clone()
338 }
339
340 fn set_stage_enabled(&mut self, index: usize, enabled: bool) -> Result<(), Self::Error> {
341 if index >= self.enabled_stages.len() {
342 return Err(CompressionError::Pipeline {
343 stage: index,
344 message: "Stage index out of bounds".to_string(),
345 });
346 }
347
348 self.enabled_stages[index] = enabled;
349 Ok(())
350 }
351}
352
353impl Default for MultiPassCompressor {
354 fn default() -> Self {
355 Self::new()
356 }
357}
358
359#[cfg(test)]
360mod tests {
361 use super::*;
362
363 #[test]
364 fn test_multi_pass_basic() {
365 let mut compressor = MultiPassCompressor::new();
366 let test_data = b"AAAA BBBB CCCC AAAA BBBB CCCC".repeat(10);
367
368 let compressed = compressor.compress(&test_data).unwrap();
369
370 assert!(compressed.len() < test_data.len());
372 }
373
374 #[test]
375 fn test_pipeline_operations() {
376 let mut compressor = MultiPassCompressor::new();
377
378 assert_eq!(compressor.stage_count(), 0);
379
380 }
383}