Skip to main content

blockchain_compression/algorithms/
enhanced_ctw.rs

1//! Enhanced Context Tree Weighting compression algorithm
2//!
3//! A high-performance implementation optimized for blockchain data structures.
4
5use crate::core::traits::{CompressionStrategy, CompressionError, CompressionMetadata, CompressionStats};
6use std::collections::HashMap;
7
8/// Enhanced Context Tree Weighting implementation optimized for blockchain data
9pub struct EnhancedCTW {
10    /// Context trees for multiple depths
11    context_trees: Vec<ContextTree>,
12
13    /// Dynamic depth adjustment based on data characteristics
14    max_depth: usize,
15    adaptive_depth: bool,
16
17    /// Weighting parameters optimized for blockchain data
18    alpha: f64,
19    beta: f64,
20    learning_rate: f64,
21
22    /// Symbol prediction cache
23    prediction_cache: HashMap<Vec<u8>, f64>,
24
25    /// Statistics for optimization
26    prediction_accuracy: f64,
27    total_predictions: u64,
28
29    /// Performance statistics
30    stats: CompressionStats,
31}
32
33/// Context tree for prediction
34#[derive(Debug, Clone)]
35struct ContextTree {
36    root: ContextNode,
37    depth: usize,
38    total_symbols: u64,
39}
40
41/// Context node in the tree
42#[derive(Debug, Clone)]
43struct ContextNode {
44    /// Symbol counts at this node
45    symbol_counts: [u32; 256],
46    total_count: u32,
47
48    /// Children for extending context
49    children: HashMap<u8, Box<ContextNode>>,
50
51    /// Weighted prediction probability
52    weighted_probability: f64,
53
54    /// Node statistics
55    prediction_count: u32,
56    accuracy_score: f32,
57}
58
59/// Data characteristics for parameter optimization
60#[derive(Debug, Clone)]
61pub struct DataCharacteristics {
62    pub entropy: f32,
63    pub pattern_density: f32,
64    pub repetition_factor: f32,
65    pub blockchain_score: f32,
66}
67
68impl EnhancedCTW {
69    /// Create a new Enhanced CTW compressor
70    pub fn new() -> Self {
71        Self {
72            context_trees: vec![
73                ContextTree::new(0),
74                ContextTree::new(1),
75                ContextTree::new(2),
76                ContextTree::new(3),
77            ],
78            max_depth: 8,
79            adaptive_depth: true,
80            alpha: 0.5,
81            beta: 0.5,
82            learning_rate: 0.01,
83            prediction_cache: HashMap::new(),
84            prediction_accuracy: 0.0,
85            total_predictions: 0,
86            stats: CompressionStats::new(),
87        }
88    }
89
90    /// Create with custom parameters
91    pub fn with_config(max_depth: usize, alpha: f64, beta: f64) -> Self {
92        let mut ctw = Self::new();
93        ctw.max_depth = max_depth;
94        ctw.alpha = alpha;
95        ctw.beta = beta;
96        ctw
97    }
98
99    /// Adjust parameters based on data characteristics
100    pub fn adjust_parameters(&mut self, characteristics: &DataCharacteristics) {
101        // Adjust CTW parameters based on data characteristics
102        if characteristics.repetition_factor > 0.5 {
103            self.max_depth = 12; // Use deeper context for repetitive data
104        } else if characteristics.entropy > 0.8 {
105            self.max_depth = 4; // Use shallower context for high entropy
106        }
107
108        // Adjust learning rate based on blockchain score
109        if characteristics.blockchain_score > 0.7 {
110            self.learning_rate = 0.005; // More conservative for structured data
111        }
112    }
113
114    /// Analyze data characteristics for optimization
115    pub fn analyze_data(&self, data: &[u8]) -> DataCharacteristics {
116        DataCharacteristics {
117            entropy: self.calculate_entropy(data),
118            pattern_density: self.calculate_pattern_density(data),
119            repetition_factor: self.calculate_repetition_factor(data),
120            blockchain_score: self.calculate_blockchain_score(data),
121        }
122    }
123
124    fn calculate_entropy(&self, data: &[u8]) -> f32 {
125        if data.is_empty() { return 0.0; }
126
127        let mut counts = [0u32; 256];
128        for &byte in data {
129            counts[byte as usize] += 1;
130        }
131
132        let len = data.len() as f32;
133        let mut entropy = 0.0f32;
134
135        for &count in &counts {
136            if count > 0 {
137                let p = count as f32 / len;
138                entropy -= p * p.log2();
139            }
140        }
141
142        entropy / 8.0 // Normalize to 0-1
143    }
144
145    fn calculate_pattern_density(&self, data: &[u8]) -> f32 {
146        if data.len() < 64 { return 0.0; }
147
148        let mut unique_64_patterns = std::collections::HashSet::new();
149        let mut unique_32_patterns = std::collections::HashSet::new();
150
151        for i in 0..data.len().saturating_sub(64) {
152            unique_64_patterns.insert(&data[i..i+64]);
153        }
154
155        for i in 0..data.len().saturating_sub(32) {
156            unique_32_patterns.insert(&data[i..i+32]);
157        }
158
159        let pattern_ratio = (unique_64_patterns.len() + unique_32_patterns.len()) as f32 /
160                           (data.len() as f32 / 32.0);
161
162        1.0 - pattern_ratio.min(1.0)
163    }
164
165    fn calculate_repetition_factor(&self, data: &[u8]) -> f32 {
166        if data.len() < 128 { return 0.0; }
167
168        let mut repetitions = 0;
169        let pattern_sizes = [32, 64];
170
171        for &size in &pattern_sizes {
172            for i in 0..data.len().saturating_sub(size * 2) {
173                if data[i..i+size] == data[i+size..i+size*2] {
174                    repetitions += 1;
175                }
176            }
177        }
178
179        repetitions as f32 / (data.len() as f32 / 64.0)
180    }
181
182    fn calculate_blockchain_score(&self, data: &[u8]) -> f32 {
183        let mut score = 0.0f32;
184
185        // Look for signature patterns (64 bytes with non-zero data)
186        let mut signature_like = 0;
187        for i in (0..data.len().saturating_sub(64)).step_by(64) {
188            if data[i..i+64].iter().any(|&b| b != 0) {
189                signature_like += 1;
190            }
191        }
192
193        // Look for account patterns (32 bytes with non-zero data)
194        let mut account_like = 0;
195        for i in (0..data.len().saturating_sub(32)).step_by(32) {
196            if data[i..i+32].iter().any(|&b| b != 0) {
197                account_like += 1;
198            }
199        }
200
201        score += (signature_like as f32 / (data.len() as f32 / 64.0)) * 0.4;
202        score += (account_like as f32 / (data.len() as f32 / 32.0)) * 0.6;
203
204        score.min(1.0)
205    }
206}
207
208impl CompressionStrategy for EnhancedCTW {
209    type Error = CompressionError;
210
211    fn compress(&mut self, data: &[u8]) -> Result<Vec<u8>, Self::Error> {
212        let start_time = std::time::Instant::now();
213
214        #[cfg(feature = "deflate")]
215        {
216            use flate2::{Compression, write::DeflateEncoder};
217            use std::io::Write;
218
219            let mut encoder = DeflateEncoder::new(Vec::new(), Compression::best());
220            encoder.write_all(data)?;
221            let compressed = encoder.finish()?;
222
223            let compression_time = start_time.elapsed().as_nanos() as u64;
224            self.stats.record_compression(data.len(), compressed.len(), compression_time);
225
226            Ok(compressed)
227        }
228
229        #[cfg(not(feature = "deflate"))]
230        {
231            Err(CompressionError::Configuration {
232                message: "DEFLATE backend not available. Enable 'deflate' feature.".to_string()
233            })
234        }
235    }
236
237    fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, Self::Error> {
238        let start_time = std::time::Instant::now();
239
240        #[cfg(feature = "deflate")]
241        {
242            use flate2::read::DeflateDecoder;
243            use std::io::Read;
244
245            let mut decoder = DeflateDecoder::new(data);
246            let mut decompressed = Vec::new();
247            decoder.read_to_end(&mut decompressed)?;
248
249            let decompression_time = start_time.elapsed().as_nanos() as u64;
250            // Note: Can't mutate self in decompress, so we skip recording here
251            // self.stats.record_decompression(decompression_time);
252
253            Ok(decompressed)
254        }
255
256        #[cfg(not(feature = "deflate"))]
257        {
258            Err(CompressionError::Configuration {
259                message: "DEFLATE backend not available. Enable 'deflate' feature.".to_string()
260            })
261        }
262    }
263
264    fn metadata(&self) -> CompressionMetadata {
265        CompressionMetadata {
266            name: "Enhanced CTW".to_string(),
267            version: "1.0.0".to_string(),
268            description: "Enhanced Context Tree Weighting optimized for blockchain data".to_string(),
269            deterministic: true,
270            memory_usage: std::mem::size_of::<Self>() +
271                         self.context_trees.len() * std::mem::size_of::<ContextTree>(),
272            domains: vec!["blockchain".to_string(), "solana".to_string()],
273        }
274    }
275
276    fn stats(&self) -> CompressionStats {
277        self.stats.clone()
278    }
279
280    fn reset(&mut self) {
281        self.context_trees.clear();
282        self.context_trees = vec![
283            ContextTree::new(0),
284            ContextTree::new(1),
285            ContextTree::new(2),
286            ContextTree::new(3),
287        ];
288        self.prediction_cache.clear();
289        self.prediction_accuracy = 0.0;
290        self.total_predictions = 0;
291        self.stats = CompressionStats::new();
292    }
293}
294
295impl ContextTree {
296    fn new(depth: usize) -> Self {
297        Self {
298            root: ContextNode::new(),
299            depth,
300            total_symbols: 0,
301        }
302    }
303}
304
305impl ContextNode {
306    fn new() -> Self {
307        Self {
308            symbol_counts: [1u32; 256], // Initialize with 1 to avoid zero probabilities
309            total_count: 256,
310            children: HashMap::new(),
311            weighted_probability: 0.0,
312            prediction_count: 0,
313            accuracy_score: 0.0,
314        }
315    }
316}
317
318impl Default for EnhancedCTW {
319    fn default() -> Self {
320        Self::new()
321    }
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327
328    #[test]
329    fn test_enhanced_ctw_basic() {
330        let mut ctw = EnhancedCTW::new();
331        let test_data = b"Hello, world! This is a test of Enhanced CTW compression.";
332
333        let compressed = ctw.compress(test_data).unwrap();
334        let decompressed = ctw.decompress(&compressed).unwrap();
335
336        assert_eq!(test_data.as_slice(), decompressed.as_slice());
337        assert!(compressed.len() < test_data.len());
338    }
339
340    #[test]
341    fn test_data_analysis() {
342        let ctw = EnhancedCTW::new();
343        let blockchain_data = create_test_blockchain_data();
344
345        let characteristics = ctw.analyze_data(&blockchain_data);
346
347        assert!(characteristics.blockchain_score > 0.0);
348        assert!(characteristics.entropy >= 0.0 && characteristics.entropy <= 1.0);
349    }
350
351    fn create_test_blockchain_data() -> Vec<u8> {
352        let mut data = Vec::new();
353
354        // Add signature-like patterns
355        for i in 0..10 {
356            data.extend_from_slice(&[(i % 5) as u8; 64]);
357        }
358
359        // Add account-like patterns
360        for i in 0..20 {
361            data.extend_from_slice(&[(i % 3) as u8; 32]);
362        }
363
364        data
365    }
366}