codex_memory/memory/
compression.rs

1use crate::memory::error::{MemoryError, Result};
2use serde_json::Value;
3use tracing::{debug, warn};
4
5/// Compression engine for frozen memory tier using zstd
6/// Optimized for 5:1 compression ratio target with fast decompression
7pub struct ZstdCompressionEngine {
8    compression_level: i32,
9}
10
11impl ZstdCompressionEngine {
12    /// Create a new compression engine with optimal settings for memory data
13    pub fn new() -> Self {
14        Self {
15            // Level 3 provides good balance between compression ratio and speed
16            // Targeting 5:1 compression ratio as specified
17            compression_level: 3,
18        }
19    }
20
21    /// Create compression engine with custom level
22    pub fn with_level(level: i32) -> Self {
23        Self {
24            compression_level: level.clamp(1, 22), // zstd supports levels 1-22
25        }
26    }
27
28    /// Compress memory content and metadata into a single compressed blob
29    /// Returns compressed bytes and compression metrics
30    pub fn compress_memory_data(
31        &self,
32        content: &str,
33        metadata: &Value,
34    ) -> Result<CompressionResult> {
35        debug!(
36            "Compressing memory data: content_length={}, compression_level={}",
37            content.len(),
38            self.compression_level
39        );
40
41        // Serialize the combined data structure
42        let memory_data = MemoryData {
43            content: content.to_string(),
44            metadata: metadata.clone(),
45            compressed_at: chrono::Utc::now(),
46            original_size: content.len() as u64,
47        };
48
49        let serialized =
50            serde_json::to_vec(&memory_data).map_err(|e| MemoryError::SerializationError {
51                message: format!("Failed to serialize memory data: {e}"),
52            })?;
53
54        // Compress using zstd
55        let compressed =
56            zstd::encode_all(std::io::Cursor::new(&serialized), self.compression_level).map_err(
57                |e| MemoryError::CompressionError {
58                    message: format!("zstd compression failed: {e}"),
59                },
60            )?;
61
62        let original_size = serialized.len() as u64;
63        let compressed_size = compressed.len() as u64;
64        let compression_ratio = original_size as f64 / compressed_size as f64;
65
66        debug!(
67            "Compression completed: original={}B, compressed={}B, ratio={:.2}:1",
68            original_size, compressed_size, compression_ratio
69        );
70
71        // Warn if compression ratio is below target
72        if compression_ratio < 5.0 {
73            warn!(
74                "Compression ratio {:.2}:1 is below target 5:1 for content length {}",
75                compression_ratio,
76                content.len()
77            );
78        }
79
80        Ok(CompressionResult {
81            compressed_data: compressed,
82            original_size,
83            compressed_size,
84            compression_ratio,
85        })
86    }
87
88    /// Decompress memory data back to original content and metadata
89    /// Includes integrity validation
90    pub fn decompress_memory_data(&self, compressed_data: &[u8]) -> Result<MemoryData> {
91        debug!(
92            "Decompressing memory data: compressed_size={}B",
93            compressed_data.len()
94        );
95
96        // Decompress using zstd
97        let decompressed =
98            zstd::decode_all(std::io::Cursor::new(compressed_data)).map_err(|e| {
99                MemoryError::DecompressionError {
100                    message: format!("zstd decompression failed: {e}"),
101                }
102            })?;
103
104        // Deserialize the memory data
105        let memory_data: MemoryData =
106            serde_json::from_slice(&decompressed).map_err(|e| MemoryError::SerializationError {
107                message: format!("Failed to deserialize memory data: {e}"),
108            })?;
109
110        // Validate integrity
111        if memory_data.content.len() != memory_data.original_size as usize {
112            return Err(MemoryError::IntegrityError {
113                message: format!(
114                    "Content size mismatch: expected {}, got {}",
115                    memory_data.original_size,
116                    memory_data.content.len()
117                ),
118            });
119        }
120
121        debug!(
122            "Decompression completed: content_length={}B",
123            memory_data.content.len()
124        );
125
126        Ok(memory_data)
127    }
128
129    /// Batch compress multiple memories for efficient processing
130    pub fn batch_compress(&self, memories: Vec<(&str, &Value)>) -> Result<Vec<CompressionResult>> {
131        debug!("Starting batch compression of {} memories", memories.len());
132
133        let mut results = Vec::with_capacity(memories.len());
134        let mut total_original = 0u64;
135        let mut total_compressed = 0u64;
136
137        for (content, metadata) in memories {
138            match self.compress_memory_data(content, metadata) {
139                Ok(result) => {
140                    total_original += result.original_size;
141                    total_compressed += result.compressed_size;
142                    results.push(result);
143                }
144                Err(e) => {
145                    warn!("Failed to compress memory in batch: {}", e);
146                    return Err(e);
147                }
148            }
149        }
150
151        let overall_ratio = total_original as f64 / total_compressed as f64;
152        debug!(
153            "Batch compression completed: {} memories, overall ratio {:.2}:1",
154            results.len(),
155            overall_ratio
156        );
157
158        Ok(results)
159    }
160
161    /// Estimate compression ratio for planning purposes
162    pub fn estimate_compression_ratio(&self, content: &str) -> f64 {
163        // Quick heuristic based on content characteristics
164        let content_len = content.len() as f64;
165
166        // Base ratio estimates for different content types
167        let estimated_ratio = if content_len < 100.0 {
168            // Very short content compresses poorly
169            2.0
170        } else if content.chars().all(|c| c.is_ascii() && !c.is_control()) {
171            // Text content typically compresses well
172            if content.contains("  ") || content.contains("\n\n") {
173                // Whitespace-heavy content compresses very well
174                7.0
175            } else {
176                5.5
177            }
178        } else {
179            // Mixed content
180            4.0
181        };
182
183        // Adjust based on content length (longer content often compresses better)
184        let length_factor = (content_len / 1000.0).min(2.0).max(0.5);
185        estimated_ratio * length_factor
186    }
187}
188
189impl Default for ZstdCompressionEngine {
190    fn default() -> Self {
191        Self::new()
192    }
193}
194
195/// Combined memory data structure for compression
196#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
197pub struct MemoryData {
198    pub content: String,
199    pub metadata: Value,
200    pub compressed_at: chrono::DateTime<chrono::Utc>,
201    pub original_size: u64,
202}
203
204/// Result of compression operation with metrics
205#[derive(Debug, Clone)]
206pub struct CompressionResult {
207    pub compressed_data: Vec<u8>,
208    pub original_size: u64,
209    pub compressed_size: u64,
210    pub compression_ratio: f64,
211}
212
213/// Compression statistics for monitoring
214#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
215pub struct CompressionStats {
216    pub total_memories_compressed: u64,
217    pub total_original_bytes: u64,
218    pub total_compressed_bytes: u64,
219    pub average_compression_ratio: f64,
220    pub total_space_saved_bytes: u64,
221    pub compression_efficiency_percent: f64,
222}
223
224impl CompressionStats {
225    pub fn new() -> Self {
226        Self {
227            total_memories_compressed: 0,
228            total_original_bytes: 0,
229            total_compressed_bytes: 0,
230            average_compression_ratio: 0.0,
231            total_space_saved_bytes: 0,
232            compression_efficiency_percent: 0.0,
233        }
234    }
235
236    pub fn add_compression(&mut self, result: &CompressionResult) {
237        self.total_memories_compressed += 1;
238        self.total_original_bytes += result.original_size;
239        self.total_compressed_bytes += result.compressed_size;
240        self.total_space_saved_bytes = self.total_original_bytes - self.total_compressed_bytes;
241
242        self.average_compression_ratio = if self.total_compressed_bytes > 0 {
243            self.total_original_bytes as f64 / self.total_compressed_bytes as f64
244        } else {
245            0.0
246        };
247
248        self.compression_efficiency_percent = if self.total_original_bytes > 0 {
249            (self.total_space_saved_bytes as f64 / self.total_original_bytes as f64) * 100.0
250        } else {
251            0.0
252        };
253    }
254}
255
256impl Default for CompressionStats {
257    fn default() -> Self {
258        Self::new()
259    }
260}
261
262/// Frozen memory compression utilities
263pub struct FrozenMemoryCompression;
264
265impl FrozenMemoryCompression {
266    /// Convert compression result to database-ready format
267    pub fn to_database_format(result: CompressionResult) -> (Vec<u8>, i32, i32, f64) {
268        (
269            result.compressed_data,
270            result.original_size as i32,
271            result.compressed_size as i32,
272            result.compression_ratio,
273        )
274    }
275
276    /// Validate compression meets frozen tier requirements
277    pub fn validate_compression_quality(ratio: f64, content_length: usize) -> Result<()> {
278        const MIN_COMPRESSION_RATIO: f64 = 2.0; // Absolute minimum
279        const TARGET_COMPRESSION_RATIO: f64 = 5.0; // Target ratio
280        const MIN_CONTENT_LENGTH: usize = 50; // Don't compress very short content
281
282        if content_length < MIN_CONTENT_LENGTH {
283            return Err(MemoryError::CompressionError {
284                message: format!(
285                    "Content too short for compression: {content_length} bytes (minimum: {MIN_CONTENT_LENGTH})"
286                ),
287            });
288        }
289
290        if ratio < MIN_COMPRESSION_RATIO {
291            return Err(MemoryError::CompressionError {
292                message: format!(
293                    "Compression ratio {ratio:.2}:1 is below minimum {MIN_COMPRESSION_RATIO:.1}:1"
294                ),
295            });
296        }
297
298        if ratio < TARGET_COMPRESSION_RATIO {
299            warn!(
300                "Compression ratio {:.2}:1 is below target {:.1}:1",
301                ratio, TARGET_COMPRESSION_RATIO
302            );
303        }
304
305        Ok(())
306    }
307
308    /// Calculate storage savings from compression
309    pub fn calculate_storage_savings(original_size: u64, compressed_size: u64) -> StorageSavings {
310        let space_saved = original_size.saturating_sub(compressed_size);
311        let compression_ratio = if compressed_size > 0 {
312            original_size as f64 / compressed_size as f64
313        } else {
314            0.0
315        };
316        let efficiency_percent = if original_size > 0 {
317            (space_saved as f64 / original_size as f64) * 100.0
318        } else {
319            0.0
320        };
321
322        StorageSavings {
323            original_size,
324            compressed_size,
325            space_saved,
326            compression_ratio,
327            efficiency_percent,
328        }
329    }
330}
331
332#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
333pub struct StorageSavings {
334    pub original_size: u64,
335    pub compressed_size: u64,
336    pub space_saved: u64,
337    pub compression_ratio: f64,
338    pub efficiency_percent: f64,
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344    use serde_json::json;
345
346    #[test]
347    fn test_compression_decompression_roundtrip() {
348        let engine = ZstdCompressionEngine::new();
349        let content = "This is a test memory content that should compress well because it has repetitive patterns and common English words.".repeat(10);
350        let metadata = json!({
351            "tag": "test",
352            "importance": 0.8,
353            "created_at": "2024-01-01T00:00:00Z"
354        });
355
356        // Compress
357        let result = engine.compress_memory_data(&content, &metadata).unwrap();
358
359        // Should achieve good compression ratio
360        assert!(result.compression_ratio > 3.0);
361        assert!(result.compressed_size < result.original_size);
362
363        // Decompress
364        let decompressed = engine
365            .decompress_memory_data(&result.compressed_data)
366            .unwrap();
367
368        // Verify data integrity
369        assert_eq!(decompressed.content, content);
370        assert_eq!(decompressed.metadata, metadata);
371        assert_eq!(decompressed.original_size, content.len() as u64);
372    }
373
374    #[test]
375    fn test_compression_ratio_estimation() {
376        let engine = ZstdCompressionEngine::new();
377
378        // Long repetitive content should have high estimated ratio
379        let repetitive_content = "Hello world! ".repeat(100);
380        let ratio = engine.estimate_compression_ratio(&repetitive_content);
381        assert!(ratio > 5.0);
382
383        // Short content should have low estimated ratio
384        let short_content = "Hi";
385        let ratio = engine.estimate_compression_ratio(short_content);
386        assert!(ratio < 3.0);
387    }
388
389    #[test]
390    fn test_batch_compression() {
391        let engine = ZstdCompressionEngine::new();
392        let metadata = json!({"test": true});
393
394        let memories = vec![
395            ("First memory content", &metadata),
396            ("Second memory content with different text", &metadata),
397            (
398                "Third memory content for testing batch processing",
399                &metadata,
400            ),
401        ];
402
403        let results = engine.batch_compress(memories).unwrap();
404
405        assert_eq!(results.len(), 3);
406        for result in results {
407            assert!(result.compression_ratio > 1.0);
408            assert!(result.compressed_size > 0);
409        }
410    }
411
412    #[test]
413    fn test_compression_validation() {
414        // Valid compression should pass
415        assert!(FrozenMemoryCompression::validate_compression_quality(5.0, 1000).is_ok());
416
417        // Below minimum ratio should fail
418        assert!(FrozenMemoryCompression::validate_compression_quality(1.5, 1000).is_err());
419
420        // Too short content should fail
421        assert!(FrozenMemoryCompression::validate_compression_quality(5.0, 10).is_err());
422    }
423
424    #[test]
425    fn test_storage_savings_calculation() {
426        let savings = FrozenMemoryCompression::calculate_storage_savings(1000, 200);
427
428        assert_eq!(savings.original_size, 1000);
429        assert_eq!(savings.compressed_size, 200);
430        assert_eq!(savings.space_saved, 800);
431        assert_eq!(savings.compression_ratio, 5.0);
432        assert_eq!(savings.efficiency_percent, 80.0);
433    }
434
435    #[test]
436    fn test_compression_stats_tracking() {
437        let mut stats = CompressionStats::new();
438
439        let result = CompressionResult {
440            compressed_data: vec![1, 2, 3],
441            original_size: 1000,
442            compressed_size: 200,
443            compression_ratio: 5.0,
444        };
445
446        stats.add_compression(&result);
447
448        assert_eq!(stats.total_memories_compressed, 1);
449        assert_eq!(stats.total_original_bytes, 1000);
450        assert_eq!(stats.total_compressed_bytes, 200);
451        assert_eq!(stats.average_compression_ratio, 5.0);
452        assert_eq!(stats.total_space_saved_bytes, 800);
453        assert_eq!(stats.compression_efficiency_percent, 80.0);
454    }
455}