blockchain_compression/algorithms/
enhanced_ctw.rs1use crate::core::traits::{CompressionStrategy, CompressionError, CompressionMetadata, CompressionStats};
6use std::collections::HashMap;
7
8pub struct EnhancedCTW {
10 context_trees: Vec<ContextTree>,
12
13 max_depth: usize,
15 adaptive_depth: bool,
16
17 alpha: f64,
19 beta: f64,
20 learning_rate: f64,
21
22 prediction_cache: HashMap<Vec<u8>, f64>,
24
25 prediction_accuracy: f64,
27 total_predictions: u64,
28
29 stats: CompressionStats,
31}
32
33#[derive(Debug, Clone)]
35struct ContextTree {
36 root: ContextNode,
37 depth: usize,
38 total_symbols: u64,
39}
40
41#[derive(Debug, Clone)]
43struct ContextNode {
44 symbol_counts: [u32; 256],
46 total_count: u32,
47
48 children: HashMap<u8, Box<ContextNode>>,
50
51 weighted_probability: f64,
53
54 prediction_count: u32,
56 accuracy_score: f32,
57}
58
59#[derive(Debug, Clone)]
61pub struct DataCharacteristics {
62 pub entropy: f32,
63 pub pattern_density: f32,
64 pub repetition_factor: f32,
65 pub blockchain_score: f32,
66}
67
68impl EnhancedCTW {
69 pub fn new() -> Self {
71 Self {
72 context_trees: vec![
73 ContextTree::new(0),
74 ContextTree::new(1),
75 ContextTree::new(2),
76 ContextTree::new(3),
77 ],
78 max_depth: 8,
79 adaptive_depth: true,
80 alpha: 0.5,
81 beta: 0.5,
82 learning_rate: 0.01,
83 prediction_cache: HashMap::new(),
84 prediction_accuracy: 0.0,
85 total_predictions: 0,
86 stats: CompressionStats::new(),
87 }
88 }
89
90 pub fn with_config(max_depth: usize, alpha: f64, beta: f64) -> Self {
92 let mut ctw = Self::new();
93 ctw.max_depth = max_depth;
94 ctw.alpha = alpha;
95 ctw.beta = beta;
96 ctw
97 }
98
99 pub fn adjust_parameters(&mut self, characteristics: &DataCharacteristics) {
101 if characteristics.repetition_factor > 0.5 {
103 self.max_depth = 12; } else if characteristics.entropy > 0.8 {
105 self.max_depth = 4; }
107
108 if characteristics.blockchain_score > 0.7 {
110 self.learning_rate = 0.005; }
112 }
113
114 pub fn analyze_data(&self, data: &[u8]) -> DataCharacteristics {
116 DataCharacteristics {
117 entropy: self.calculate_entropy(data),
118 pattern_density: self.calculate_pattern_density(data),
119 repetition_factor: self.calculate_repetition_factor(data),
120 blockchain_score: self.calculate_blockchain_score(data),
121 }
122 }
123
124 fn calculate_entropy(&self, data: &[u8]) -> f32 {
125 if data.is_empty() { return 0.0; }
126
127 let mut counts = [0u32; 256];
128 for &byte in data {
129 counts[byte as usize] += 1;
130 }
131
132 let len = data.len() as f32;
133 let mut entropy = 0.0f32;
134
135 for &count in &counts {
136 if count > 0 {
137 let p = count as f32 / len;
138 entropy -= p * p.log2();
139 }
140 }
141
142 entropy / 8.0 }
144
145 fn calculate_pattern_density(&self, data: &[u8]) -> f32 {
146 if data.len() < 64 { return 0.0; }
147
148 let mut unique_64_patterns = std::collections::HashSet::new();
149 let mut unique_32_patterns = std::collections::HashSet::new();
150
151 for i in 0..data.len().saturating_sub(64) {
152 unique_64_patterns.insert(&data[i..i+64]);
153 }
154
155 for i in 0..data.len().saturating_sub(32) {
156 unique_32_patterns.insert(&data[i..i+32]);
157 }
158
159 let pattern_ratio = (unique_64_patterns.len() + unique_32_patterns.len()) as f32 /
160 (data.len() as f32 / 32.0);
161
162 1.0 - pattern_ratio.min(1.0)
163 }
164
165 fn calculate_repetition_factor(&self, data: &[u8]) -> f32 {
166 if data.len() < 128 { return 0.0; }
167
168 let mut repetitions = 0;
169 let pattern_sizes = [32, 64];
170
171 for &size in &pattern_sizes {
172 for i in 0..data.len().saturating_sub(size * 2) {
173 if data[i..i+size] == data[i+size..i+size*2] {
174 repetitions += 1;
175 }
176 }
177 }
178
179 repetitions as f32 / (data.len() as f32 / 64.0)
180 }
181
182 fn calculate_blockchain_score(&self, data: &[u8]) -> f32 {
183 let mut score = 0.0f32;
184
185 let mut signature_like = 0;
187 for i in (0..data.len().saturating_sub(64)).step_by(64) {
188 if data[i..i+64].iter().any(|&b| b != 0) {
189 signature_like += 1;
190 }
191 }
192
193 let mut account_like = 0;
195 for i in (0..data.len().saturating_sub(32)).step_by(32) {
196 if data[i..i+32].iter().any(|&b| b != 0) {
197 account_like += 1;
198 }
199 }
200
201 score += (signature_like as f32 / (data.len() as f32 / 64.0)) * 0.4;
202 score += (account_like as f32 / (data.len() as f32 / 32.0)) * 0.6;
203
204 score.min(1.0)
205 }
206}
207
208impl CompressionStrategy for EnhancedCTW {
209 type Error = CompressionError;
210
211 fn compress(&mut self, data: &[u8]) -> Result<Vec<u8>, Self::Error> {
212 let start_time = std::time::Instant::now();
213
214 #[cfg(feature = "deflate")]
215 {
216 use flate2::{Compression, write::DeflateEncoder};
217 use std::io::Write;
218
219 let mut encoder = DeflateEncoder::new(Vec::new(), Compression::best());
220 encoder.write_all(data)?;
221 let compressed = encoder.finish()?;
222
223 let compression_time = start_time.elapsed().as_nanos() as u64;
224 self.stats.record_compression(data.len(), compressed.len(), compression_time);
225
226 Ok(compressed)
227 }
228
229 #[cfg(not(feature = "deflate"))]
230 {
231 Err(CompressionError::Configuration {
232 message: "DEFLATE backend not available. Enable 'deflate' feature.".to_string()
233 })
234 }
235 }
236
237 fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, Self::Error> {
238 let start_time = std::time::Instant::now();
239
240 #[cfg(feature = "deflate")]
241 {
242 use flate2::read::DeflateDecoder;
243 use std::io::Read;
244
245 let mut decoder = DeflateDecoder::new(data);
246 let mut decompressed = Vec::new();
247 decoder.read_to_end(&mut decompressed)?;
248
249 let decompression_time = start_time.elapsed().as_nanos() as u64;
250 Ok(decompressed)
254 }
255
256 #[cfg(not(feature = "deflate"))]
257 {
258 Err(CompressionError::Configuration {
259 message: "DEFLATE backend not available. Enable 'deflate' feature.".to_string()
260 })
261 }
262 }
263
264 fn metadata(&self) -> CompressionMetadata {
265 CompressionMetadata {
266 name: "Enhanced CTW".to_string(),
267 version: "1.0.0".to_string(),
268 description: "Enhanced Context Tree Weighting optimized for blockchain data".to_string(),
269 deterministic: true,
270 memory_usage: std::mem::size_of::<Self>() +
271 self.context_trees.len() * std::mem::size_of::<ContextTree>(),
272 domains: vec!["blockchain".to_string(), "solana".to_string()],
273 }
274 }
275
276 fn stats(&self) -> CompressionStats {
277 self.stats.clone()
278 }
279
280 fn reset(&mut self) {
281 self.context_trees.clear();
282 self.context_trees = vec![
283 ContextTree::new(0),
284 ContextTree::new(1),
285 ContextTree::new(2),
286 ContextTree::new(3),
287 ];
288 self.prediction_cache.clear();
289 self.prediction_accuracy = 0.0;
290 self.total_predictions = 0;
291 self.stats = CompressionStats::new();
292 }
293}
294
295impl ContextTree {
296 fn new(depth: usize) -> Self {
297 Self {
298 root: ContextNode::new(),
299 depth,
300 total_symbols: 0,
301 }
302 }
303}
304
305impl ContextNode {
306 fn new() -> Self {
307 Self {
308 symbol_counts: [1u32; 256], total_count: 256,
310 children: HashMap::new(),
311 weighted_probability: 0.0,
312 prediction_count: 0,
313 accuracy_score: 0.0,
314 }
315 }
316}
317
318impl Default for EnhancedCTW {
319 fn default() -> Self {
320 Self::new()
321 }
322}
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327
328 #[test]
329 fn test_enhanced_ctw_basic() {
330 let mut ctw = EnhancedCTW::new();
331 let test_data = b"Hello, world! This is a test of Enhanced CTW compression.";
332
333 let compressed = ctw.compress(test_data).unwrap();
334 let decompressed = ctw.decompress(&compressed).unwrap();
335
336 assert_eq!(test_data.as_slice(), decompressed.as_slice());
337 assert!(compressed.len() < test_data.len());
338 }
339
340 #[test]
341 fn test_data_analysis() {
342 let ctw = EnhancedCTW::new();
343 let blockchain_data = create_test_blockchain_data();
344
345 let characteristics = ctw.analyze_data(&blockchain_data);
346
347 assert!(characteristics.blockchain_score > 0.0);
348 assert!(characteristics.entropy >= 0.0 && characteristics.entropy <= 1.0);
349 }
350
351 fn create_test_blockchain_data() -> Vec<u8> {
352 let mut data = Vec::new();
353
354 for i in 0..10 {
356 data.extend_from_slice(&[(i % 5) as u8; 64]);
357 }
358
359 for i in 0..20 {
361 data.extend_from_slice(&[(i % 3) as u8; 32]);
362 }
363
364 data
365 }
366}