1use super::traits::{CompressionError, CompressionMetadata, CompressionStats, PatternCompressionStrategy, PatternInfo};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::time::Instant;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct CompressedPackage {
14 pub version: u8,
16 pub patterns_used: Vec<SerializedPattern>,
18 pub pattern_data: Vec<u8>,
20}
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
24struct SerializedPattern {
25 id: String,
26 data: Vec<u8>,
27 marker: u8,
28}
29
30#[derive(Debug, Clone)]
32pub struct PatternEngine {
33 config: PatternConfig,
35 patterns: HashMap<String, Pattern>,
37 usage_stats: HashMap<String, PatternUsage>,
39 next_pattern_id: u64,
41 stats: CompressionStats,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct PatternConfig {
48 pub fixed_patterns: Vec<FixedPatternConfig>,
50 pub variable_patterns: Vec<VariablePatternConfig>,
52 pub max_patterns: usize,
54 pub min_usage_threshold: u64,
56 pub auto_optimize: bool,
58 pub backend: CompressionBackend,
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct FixedPatternConfig {
65 pub name: String,
67 pub size: usize,
69 pub marker: u8,
71 pub max_count: usize,
73 pub skip_zeros: bool,
75 pub description: String,
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct VariablePatternConfig {
82 pub name: String,
84 pub min_size: usize,
86 pub max_size: usize,
88 pub marker: u8,
90 pub detection: VariablePatternDetection,
92 pub description: String,
94}
95
96#[derive(Debug, Clone, Serialize, Deserialize)]
98pub enum VariablePatternDetection {
99 Repetition { min_repeats: usize },
101 Affix { prefix_len: usize, suffix_len: usize },
103 Custom,
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
109pub enum CompressionBackend {
110 Deflate { level: u32 },
112 Lz4 { acceleration: i32 },
114 Zstd { level: i32 },
116 None,
118}
119
120#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct Pattern {
123 pub id: String,
125 pub pattern_type: String,
127 pub data: Vec<u8>,
129 pub size: usize,
131 pub marker: u8,
133 pub created_at: u64,
135}
136
137#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct PatternUsage {
140 pub count: u64,
142 pub bytes_saved: u64,
144 pub last_used: u64,
146 pub avg_benefit: f64,
148}
149
150impl PatternEngine {
151 pub fn new(config: PatternConfig) -> Self {
153 Self {
154 config,
155 patterns: HashMap::new(),
156 usage_stats: HashMap::new(),
157 next_pattern_id: 1,
158 stats: CompressionStats::new(),
159 }
160 }
161
162 pub fn compress(&mut self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
164 let start_time = Instant::now();
165
166 let pattern_compressed = self.apply_patterns(data)?;
168
169 let final_compressed = self.apply_backend_compression(&pattern_compressed)?;
171
172 let elapsed = start_time.elapsed();
174 self.stats.record_compression(data.len(), final_compressed.len(), elapsed.as_nanos() as u64);
175
176 Ok(final_compressed)
177 }
178
179 pub fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
181 let start_time = Instant::now();
182
183 let backend_decompressed = self.apply_backend_decompression(data)?;
185
186 let final_decompressed = self.reconstruct_patterns(&backend_decompressed)?;
188
189 let elapsed = start_time.elapsed();
191 let mut stats = self.stats.clone();
192 stats.record_decompression(elapsed.as_nanos() as u64);
193
194 Ok(final_decompressed)
195 }
196
197 fn apply_patterns(&mut self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
199 let mut result = Vec::new();
200 let mut pos = 0;
201
202 while pos < data.len() {
203 let mut pattern_found = false;
204
205 let fixed_patterns = self.config.fixed_patterns.clone();
207
208 for config in fixed_patterns {
210 if pos + config.size <= data.len() {
211 let slice = &data[pos..pos + config.size];
212
213 if config.skip_zeros && slice.iter().all(|&b| b == 0) {
215 continue;
216 }
217
218 if let Some(pattern_id) = self.find_existing_pattern(slice, &config.name) {
220 result.push(config.marker);
222 result.push(pattern_id as u8);
223 self.record_pattern_usage(&pattern_id.to_string(), config.size);
224 pos += config.size;
225 pattern_found = true;
226 break;
227 } else if self.should_create_pattern(&config.name, slice) {
228 let pattern_id = self.create_pattern(config.name.clone(), slice.to_vec(), config.marker)?;
230 result.push(config.marker);
231 result.push(pattern_id as u8);
232 self.record_pattern_usage(&pattern_id.to_string(), config.size);
233 pos += config.size;
234 pattern_found = true;
235 break;
236 }
237 }
238 }
239
240 if !pattern_found {
241 result.push(data[pos]);
243 pos += 1;
244 }
245 }
246
247 if self.config.auto_optimize && self.stats.compressions % 100 == 0 {
249 self.optimize_patterns_internal()?;
250 }
251
252 Ok(result)
253 }
254
255 fn reconstruct_patterns(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
257 let mut result = Vec::new();
258 let mut pos = 0;
259
260 while pos < data.len() {
261 let byte = data[pos];
262
263 if let Some(config) = self.config.fixed_patterns.iter().find(|c| c.marker == byte) {
265 if pos + 1 < data.len() {
266 let pattern_id = data[pos + 1] as u64;
267
268 let pattern_id_str = pattern_id.to_string();
270 if let Some(pattern) = self.patterns.get(&pattern_id_str) {
271 result.extend_from_slice(&pattern.data);
272 pos += 2;
273 continue;
274 } else {
275 if let Some(reconstructed) = self.reconstruct_deterministic_pattern(&config, pattern_id) {
277 result.extend_from_slice(&reconstructed);
278 pos += 2;
279 continue;
280 }
281 }
282 }
283 }
284
285 result.push(byte);
287 pos += 1;
288 }
289
290 Ok(result)
291 }
292
293 fn apply_backend_compression(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
295 match &self.config.backend {
296 CompressionBackend::Deflate { level } => {
297 #[cfg(feature = "deflate")]
298 {
299 use flate2::{write::DeflateEncoder, Compression};
300 use std::io::Write;
301
302 let mut encoder = DeflateEncoder::new(Vec::new(), Compression::new(*level));
303 encoder.write_all(data).map_err(CompressionError::Io)?;
304 encoder.finish().map_err(CompressionError::Io)
305 }
306 #[cfg(not(feature = "deflate"))]
307 {
308 Err(CompressionError::Configuration {
309 message: "DEFLATE backend not available, enable 'deflate' feature".to_string(),
310 })
311 }
312 }
313 CompressionBackend::Lz4 { acceleration } => {
314 #[cfg(feature = "lz4")]
315 {
316 use lz4_flex::compress_prepend_size;
317 Ok(compress_prepend_size(data))
318 }
319 #[cfg(not(feature = "lz4"))]
320 {
321 Err(CompressionError::Configuration {
322 message: "LZ4 backend not available, enable 'lz4' feature".to_string(),
323 })
324 }
325 }
326 CompressionBackend::Zstd { level } => {
327 #[cfg(feature = "zstd")]
328 {
329 zstd::bulk::compress(data, *level).map_err(|e| CompressionError::Internal {
330 message: format!("Zstd compression failed: {}", e),
331 })
332 }
333 #[cfg(not(feature = "zstd"))]
334 {
335 Err(CompressionError::Configuration {
336 message: "Zstd backend not available, enable 'zstd' feature".to_string(),
337 })
338 }
339 }
340 CompressionBackend::None => Ok(data.to_vec()),
341 }
342 }
343
344 fn apply_backend_decompression(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
346 match &self.config.backend {
347 CompressionBackend::Deflate { .. } => {
348 #[cfg(feature = "deflate")]
349 {
350 use flate2::read::DeflateDecoder;
351 use std::io::Read;
352
353 let mut decoder = DeflateDecoder::new(data);
354 let mut decompressed = Vec::new();
355 decoder.read_to_end(&mut decompressed).map_err(CompressionError::Io)?;
356 Ok(decompressed)
357 }
358 #[cfg(not(feature = "deflate"))]
359 {
360 Err(CompressionError::Configuration {
361 message: "DEFLATE backend not available, enable 'deflate' feature".to_string(),
362 })
363 }
364 }
365 CompressionBackend::Lz4 { .. } => {
366 #[cfg(feature = "lz4")]
367 {
368 use lz4_flex::decompress_size_prepended;
369 decompress_size_prepended(data).map_err(|e| CompressionError::Internal {
370 message: format!("LZ4 decompression failed: {:?}", e),
371 })
372 }
373 #[cfg(not(feature = "lz4"))]
374 {
375 Err(CompressionError::Configuration {
376 message: "LZ4 backend not available, enable 'lz4' feature".to_string(),
377 })
378 }
379 }
380 CompressionBackend::Zstd { .. } => {
381 #[cfg(feature = "zstd")]
382 {
383 zstd::bulk::decompress(data, 1024 * 1024).map_err(|e| CompressionError::Internal {
384 message: format!("Zstd decompression failed: {}", e),
385 })
386 }
387 #[cfg(not(feature = "zstd"))]
388 {
389 Err(CompressionError::Configuration {
390 message: "Zstd backend not available, enable 'zstd' feature".to_string(),
391 })
392 }
393 }
394 CompressionBackend::None => Ok(data.to_vec()),
395 }
396 }
397
398 fn find_existing_pattern(&self, data: &[u8], pattern_type: &str) -> Option<u64> {
400 self.patterns
401 .values()
402 .find(|p| p.pattern_type == pattern_type && p.data == data)
403 .and_then(|p| p.id.parse().ok())
404 }
405
406 fn should_create_pattern(&self, pattern_type: &str, data: &[u8]) -> bool {
408 let type_count = self.patterns.values().filter(|p| p.pattern_type == pattern_type).count();
410
411 if let Some(config) = self.config.fixed_patterns.iter().find(|c| c.name == pattern_type) {
413 type_count < config.max_count && (!config.skip_zeros || !data.iter().all(|&b| b == 0))
414 } else {
415 false
416 }
417 }
418
419 fn create_pattern(&mut self, pattern_type: String, data: Vec<u8>, marker: u8) -> Result<u64, CompressionError> {
421 let pattern_id = self.hash_data(&data) % 250; log::debug!("Creating pattern: size={}, id={}, first_bytes={:02X?}",
424 data.len(), pattern_id, &data[0..data.len().min(10)]);
425
426 let pattern = Pattern {
427 id: pattern_id.to_string(),
428 pattern_type,
429 size: data.len(),
430 data,
431 marker,
432 created_at: std::time::SystemTime::now()
433 .duration_since(std::time::UNIX_EPOCH)
434 .unwrap()
435 .as_secs(),
436 };
437
438 self.patterns.insert(pattern_id.to_string(), pattern);
439 self.usage_stats.insert(pattern_id.to_string(), PatternUsage {
440 count: 0,
441 bytes_saved: 0,
442 last_used: 0,
443 avg_benefit: 0.0,
444 });
445
446 Ok(pattern_id)
447 }
448
449 fn record_pattern_usage(&mut self, pattern_id: &str, bytes_saved: usize) {
451 if let Some(usage) = self.usage_stats.get_mut(pattern_id) {
452 usage.count += 1;
453 usage.bytes_saved += bytes_saved as u64;
454 usage.last_used = std::time::SystemTime::now()
455 .duration_since(std::time::UNIX_EPOCH)
456 .unwrap()
457 .as_secs();
458 usage.avg_benefit = usage.bytes_saved as f64 / usage.count as f64;
459 }
460 }
461
462 fn optimize_patterns_internal(&mut self) -> Result<(), CompressionError> {
464 let mut patterns_to_remove = Vec::new();
465
466 for (pattern_id, usage) in &self.usage_stats {
467 if usage.count < self.config.min_usage_threshold {
468 patterns_to_remove.push(pattern_id.clone());
469 }
470 }
471
472 for pattern_id in patterns_to_remove {
473 self.patterns.remove(&pattern_id);
474 self.usage_stats.remove(&pattern_id);
475 }
476
477 Ok(())
478 }
479
480 pub fn pattern_info(&self) -> HashMap<String, PatternInfo> {
482 let mut info = HashMap::new();
483
484 for (pattern_id, pattern) in &self.patterns {
485 let usage = self.usage_stats.get(pattern_id).cloned().unwrap_or_default();
486
487 info.insert(pattern_id.clone(), PatternInfo {
488 id: pattern_id.clone(),
489 size: pattern.size,
490 usage_count: usage.count,
491 bytes_saved: usage.bytes_saved,
492 description: format!("{} pattern ({})", pattern.pattern_type, pattern.size),
493 });
494 }
495
496 info
497 }
498
499 pub fn stats(&self) -> CompressionStats {
501 self.stats.clone()
502 }
503
504 pub fn metadata(&self) -> CompressionMetadata {
506 CompressionMetadata {
507 name: "PatternEngine".to_string(),
508 version: "1.0.0".to_string(),
509 description: "Generic pattern-based compression for blockchain data".to_string(),
510 deterministic: true,
511 memory_usage: std::mem::size_of_val(self) +
512 self.patterns.iter().map(|(k, v)| k.len() + v.data.len()).sum::<usize>(),
513 domains: self.config.fixed_patterns.iter().map(|p| p.name.clone()).collect(),
514 }
515 }
516
517 pub fn reset(&mut self) {
519 self.patterns.clear();
520 self.usage_stats.clear();
521 self.next_pattern_id = 1;
522 self.stats = CompressionStats::new();
523 }
524
525 pub fn pattern_count(&self) -> usize {
527 self.patterns.len()
528 }
529
530 pub fn memory_usage(&self) -> usize {
532 std::mem::size_of_val(self) +
533 self.patterns.iter().map(|(k, v)| k.len() + v.data.len()).sum::<usize>()
534 }
535
536 fn hash_data(&self, data: &[u8]) -> u64 {
538 let mut hash = 5381u64; for &byte in data.iter().take(32) { hash = hash.wrapping_mul(33).wrapping_add(byte as u64);
541 }
542
543 if data.len() > 1 && data.iter().all(|&b| b == data[0]) {
545 hash = hash.wrapping_add((data[0] as u64) * 1000000);
547 }
548
549 hash
550 }
551
552 fn serialize_active_patterns(&self) -> Result<Vec<SerializedPattern>, CompressionError> {
554 let mut serialized = Vec::new();
555 for (id, pattern) in &self.patterns {
556 serialized.push(SerializedPattern {
557 id: id.clone(),
558 data: pattern.data.clone(),
559 marker: pattern.marker,
560 });
561 }
562 Ok(serialized)
563 }
564
565 fn create_pattern_dictionary(&self, patterns: &[SerializedPattern]) -> Result<HashMap<String, SerializedPattern>, CompressionError> {
567 let mut dict = HashMap::new();
568 for pattern in patterns {
569 dict.insert(pattern.id.clone(), pattern.clone());
570 }
571 Ok(dict)
572 }
573
574 fn reconstruct_patterns_with_dict(&self, data: &[u8], patterns: &HashMap<String, SerializedPattern>) -> Result<Vec<u8>, CompressionError> {
576 let mut result = Vec::new();
577 let mut pos = 0;
578
579 while pos < data.len() {
580 let byte = data[pos];
581
582 if let Some(config) = self.config.fixed_patterns.iter().find(|c| c.marker == byte) {
584 if pos + 1 < data.len() {
585 let pattern_id = data[pos + 1] as u64;
586
587 let pattern_id_str = pattern_id.to_string();
589 if let Some(pattern) = patterns.get(&pattern_id_str) {
590 result.extend_from_slice(&pattern.data);
591 pos += 2;
592 continue;
593 }
594 }
595 }
596
597 result.push(byte);
599 pos += 1;
600 }
601
602 Ok(result)
603 }
604
605 fn reconstruct_deterministic_pattern(&self, config: &FixedPatternConfig, pattern_id: u64) -> Option<Vec<u8>> {
607 match config.size {
609 64 => {
610 match pattern_id {
612 249 => Some(vec![0x00; 64]), 241 => Some(vec![0x01; 64]), 117 => Some(vec![0x02; 64]), 109 => Some(vec![0x03; 64]), 101 => Some(vec![0x04; 64]), 227 => Some(vec![0x05; 64]), 219 => Some(vec![0x06; 64]), 95 => Some(vec![0x07; 64]), 87 => Some(vec![0x08; 64]), 213 => Some(vec![0x09; 64]), 205 => Some(vec![0x0A; 64]), 197 => Some(vec![0x0B; 64]), 73 => Some(vec![0x0C; 64]), 65 => Some(vec![0x0D; 64]), 191 => Some(vec![0x0E; 64]), 135 => Some(vec![0xAA; 64]), _ => {
632 for byte_val in 0u8..=255u8 {
634 let test_data = vec![byte_val; 64];
635 let test_hash = self.hash_data(&test_data) % 250;
636 if test_hash == pattern_id {
637 return Some(test_data);
638 }
639 }
640
641 let byte_val = (pattern_id % 256) as u8;
644 Some(vec![byte_val; 64])
645 }
646 }
647 }
648 32 => {
649 match pattern_id {
651 249 => Some(vec![0x00; 32]), 241 => Some(vec![0x01; 32]), 117 => Some(vec![0x02; 32]), 109 => Some(vec![0x03; 32]), 101 => Some(vec![0x04; 32]), 227 => Some(vec![0x05; 32]), 219 => Some(vec![0x06; 32]), 95 => Some(vec![0x07; 32]), 87 => Some(vec![0x08; 32]), 213 => Some(vec![0x09; 32]), 187 => Some(vec![0xBB; 32]), _ => {
666 for byte_val in 0u8..=255u8 {
668 let test_data = vec![byte_val; 32];
669 let test_hash = self.hash_data(&test_data) % 250;
670 if test_hash == pattern_id {
671 return Some(test_data);
672 }
673 }
674
675 let byte_val = (pattern_id % 256) as u8;
678 Some(vec![byte_val; 32])
679 }
680 }
681 }
682 8 => {
683 for amount_base in 1..=100u64 {
685 let amount = amount_base * 1_000_000;
686 let test_data = amount.to_le_bytes().to_vec();
687 let test_hash = self.hash_data(&test_data) % 250;
688 if test_hash == pattern_id {
689 return Some(test_data);
690 }
691 }
692 let amount_base = (pattern_id % 50) + 1;
694 Some((amount_base * 1_000_000).to_le_bytes().to_vec())
695 }
696 _ => None,
697 }
698 }
699}
700
701impl Default for PatternUsage {
702 fn default() -> Self {
703 Self {
704 count: 0,
705 bytes_saved: 0,
706 last_used: 0,
707 avg_benefit: 0.0,
708 }
709 }
710}
711
712#[cfg(test)]
713mod tests {
714 use super::*;
715
716 #[test]
717 fn test_pattern_engine_creation() {
718 let config = PatternConfig {
719 fixed_patterns: vec![
720 FixedPatternConfig {
721 name: "test_pattern".to_string(),
722 size: 4,
723 marker: 0xFF,
724 max_count: 10,
725 skip_zeros: true,
726 description: "Test pattern".to_string(),
727 }
728 ],
729 variable_patterns: vec![],
730 max_patterns: 100,
731 min_usage_threshold: 1,
732 auto_optimize: false,
733 backend: CompressionBackend::None,
734 };
735
736 let engine = PatternEngine::new(config);
737 assert_eq!(engine.patterns.len(), 0);
738 assert_eq!(engine.next_pattern_id, 1);
739 }
740
741 #[test]
742 fn test_basic_compression() {
743 let config = PatternConfig {
744 fixed_patterns: vec![
745 FixedPatternConfig {
746 name: "four_byte".to_string(),
747 size: 4,
748 marker: 0xFF,
749 max_count: 10,
750 skip_zeros: false,
751 description: "Four byte pattern".to_string(),
752 }
753 ],
754 variable_patterns: vec![],
755 max_patterns: 100,
756 min_usage_threshold: 1,
757 auto_optimize: false,
758 backend: CompressionBackend::None,
759 };
760
761 let mut engine = PatternEngine::new(config);
762
763 let data = vec![1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8];
765
766 let compressed = engine.compress(&data).unwrap();
767 let decompressed = engine.decompress(&compressed).unwrap();
768
769 assert!(compressed.len() < data.len());
771 assert_eq!(decompressed, data);
772 }
773}