torsh_package/
compression.rs

1//! Advanced compression algorithms for packages
2//!
3//! This module provides multiple compression algorithms optimized for different
4//! types of data commonly found in ML packages including models, source code,
5//! and configuration files.
6
7use std::collections::HashMap;
8use std::io::{Read, Write};
9
10use serde::{Deserialize, Serialize};
11use torsh_core::error::{Result, TorshError};
12
13use crate::resources::{Resource, ResourceType};
14
15/// Supported compression algorithms
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
17pub enum CompressionAlgorithm {
18    /// No compression
19    None,
20    /// Gzip compression (fast, decent compression)
21    Gzip,
22    /// Zstandard compression (excellent speed/ratio tradeoff)
23    Zstd,
24    /// LZMA compression (high compression ratio, slower)
25    Lzma,
26    /// Brotli compression (good for text/JSON)
27    Brotli,
28    /// LZ4 compression (extremely fast, lower ratio)
29    Lz4,
30}
31
32/// Compression level (0-22 depending on algorithm)
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
34pub struct CompressionLevel(pub u32);
35
36/// Compression strategy for different data types
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum CompressionStrategy {
39    /// Optimize for speed
40    Speed,
41    /// Optimize for size
42    Size,
43    /// Balanced speed/size
44    Balanced,
45    /// Adaptive based on data type
46    Adaptive,
47}
48
49/// Compression configuration
50#[derive(Debug, Clone)]
51pub struct CompressionConfig {
52    /// Default algorithm to use
53    pub default_algorithm: CompressionAlgorithm,
54    /// Default compression level
55    pub default_level: CompressionLevel,
56    /// Compression strategy
57    pub strategy: CompressionStrategy,
58    /// Per-resource-type algorithm overrides
59    pub algorithm_overrides: HashMap<ResourceType, CompressionAlgorithm>,
60    /// Minimum size threshold for compression (bytes)
61    pub min_size_threshold: usize,
62    /// Maximum size for in-memory compression
63    pub max_memory_size: usize,
64    /// Enable parallel compression for large resources
65    pub parallel_compression: bool,
66}
67
68/// Advanced compressor with multiple algorithm support
69pub struct AdvancedCompressor {
70    config: CompressionConfig,
71}
72
73/// Compression result with metadata
74#[derive(Debug, Clone)]
75pub struct CompressionResult {
76    /// Compressed data
77    pub data: Vec<u8>,
78    /// Algorithm used
79    pub algorithm: CompressionAlgorithm,
80    /// Compression level used
81    pub level: CompressionLevel,
82    /// Original size
83    pub original_size: usize,
84    /// Compressed size
85    pub compressed_size: usize,
86    /// Compression ratio (compressed/original)
87    pub ratio: f32,
88    /// Compression time in milliseconds
89    pub compression_time_ms: u64,
90}
91
92/// Decompression result
93#[derive(Debug, Clone)]
94pub struct DecompressionResult {
95    /// Decompressed data
96    pub data: Vec<u8>,
97    /// Algorithm used for compression
98    pub algorithm: CompressionAlgorithm,
99    /// Decompression time in milliseconds
100    pub decompression_time_ms: u64,
101}
102
103impl Default for CompressionLevel {
104    fn default() -> Self {
105        CompressionLevel(6)
106    }
107}
108
109impl CompressionLevel {
110    /// Create a new compression level
111    pub fn new(level: u32) -> Self {
112        CompressionLevel(level)
113    }
114
115    /// Get level for specific algorithm (clamps to valid range)
116    pub fn for_algorithm(&self, algorithm: CompressionAlgorithm) -> u32 {
117        match algorithm {
118            CompressionAlgorithm::None => 0,
119            CompressionAlgorithm::Gzip => self.0.min(9),
120            CompressionAlgorithm::Zstd => self.0.min(22),
121            CompressionAlgorithm::Lzma => self.0.min(9),
122            CompressionAlgorithm::Brotli => self.0.min(11),
123            CompressionAlgorithm::Lz4 => self.0.min(16),
124        }
125    }
126}
127
128impl Default for CompressionConfig {
129    fn default() -> Self {
130        let mut algorithm_overrides = HashMap::new();
131
132        // Text files compress well with Brotli
133        algorithm_overrides.insert(ResourceType::Source, CompressionAlgorithm::Brotli);
134        algorithm_overrides.insert(ResourceType::Config, CompressionAlgorithm::Brotli);
135        algorithm_overrides.insert(ResourceType::Documentation, CompressionAlgorithm::Brotli);
136        algorithm_overrides.insert(ResourceType::Text, CompressionAlgorithm::Brotli);
137        algorithm_overrides.insert(ResourceType::Metadata, CompressionAlgorithm::Brotli);
138
139        // Binary data works well with Zstandard
140        algorithm_overrides.insert(ResourceType::Model, CompressionAlgorithm::Zstd);
141        algorithm_overrides.insert(ResourceType::Data, CompressionAlgorithm::Zstd);
142        algorithm_overrides.insert(ResourceType::Binary, CompressionAlgorithm::Zstd);
143
144        Self {
145            default_algorithm: CompressionAlgorithm::Zstd,
146            default_level: CompressionLevel::default(),
147            strategy: CompressionStrategy::Balanced,
148            algorithm_overrides,
149            min_size_threshold: 256, // Don't compress files smaller than 256 bytes
150            max_memory_size: 100 * 1024 * 1024, // 100MB
151            parallel_compression: true,
152        }
153    }
154}
155
156impl CompressionConfig {
157    /// Create new compression config
158    pub fn new() -> Self {
159        Self::default()
160    }
161
162    /// Set default algorithm
163    pub fn with_algorithm(mut self, algorithm: CompressionAlgorithm) -> Self {
164        self.default_algorithm = algorithm;
165        self
166    }
167
168    /// Set default compression level
169    pub fn with_level(mut self, level: CompressionLevel) -> Self {
170        self.default_level = level;
171        self
172    }
173
174    /// Set compression strategy
175    pub fn with_strategy(mut self, strategy: CompressionStrategy) -> Self {
176        self.strategy = strategy;
177        self
178    }
179
180    /// Set minimum size threshold
181    pub fn with_min_threshold(mut self, threshold: usize) -> Self {
182        self.min_size_threshold = threshold;
183        self
184    }
185
186    /// Enable/disable parallel compression
187    pub fn with_parallel(mut self, parallel: bool) -> Self {
188        self.parallel_compression = parallel;
189        self
190    }
191
192    /// Get algorithm for resource type
193    pub fn algorithm_for_resource(&self, resource_type: ResourceType) -> CompressionAlgorithm {
194        self.algorithm_overrides
195            .get(&resource_type)
196            .copied()
197            .unwrap_or(self.default_algorithm)
198    }
199
200    /// Get compression level adjusted for strategy
201    pub fn level_for_strategy(&self, strategy: CompressionStrategy) -> CompressionLevel {
202        match strategy {
203            CompressionStrategy::Speed => CompressionLevel(1),
204            CompressionStrategy::Size => CompressionLevel(9),
205            CompressionStrategy::Balanced => CompressionLevel(6),
206            CompressionStrategy::Adaptive => self.default_level,
207        }
208    }
209}
210
211impl AdvancedCompressor {
212    /// Create new compressor with default config
213    pub fn new() -> Self {
214        Self {
215            config: CompressionConfig::default(),
216        }
217    }
218
219    /// Create compressor with custom config
220    pub fn with_config(config: CompressionConfig) -> Self {
221        Self { config }
222    }
223
224    /// Compress a resource
225    pub fn compress_resource(&self, resource: &Resource) -> Result<CompressionResult> {
226        // Check size threshold
227        if resource.data.len() < self.config.min_size_threshold {
228            return Ok(CompressionResult {
229                data: resource.data.clone(),
230                algorithm: CompressionAlgorithm::None,
231                level: CompressionLevel(0),
232                original_size: resource.data.len(),
233                compressed_size: resource.data.len(),
234                ratio: 1.0,
235                compression_time_ms: 0,
236            });
237        }
238
239        // Select algorithm and level
240        let algorithm = self.config.algorithm_for_resource(resource.resource_type);
241        let level = match self.config.strategy {
242            CompressionStrategy::Adaptive => self.adaptive_level(resource),
243            strategy => self.config.level_for_strategy(strategy),
244        };
245
246        self.compress_data(&resource.data, algorithm, level)
247    }
248
249    /// Compress raw data with specific algorithm and level
250    pub fn compress_data(
251        &self,
252        data: &[u8],
253        algorithm: CompressionAlgorithm,
254        level: CompressionLevel,
255    ) -> Result<CompressionResult> {
256        let start_time = std::time::Instant::now();
257
258        let compressed_data = match algorithm {
259            CompressionAlgorithm::None => data.to_vec(),
260            CompressionAlgorithm::Gzip => {
261                self.compress_gzip(data, level.for_algorithm(algorithm))?
262            }
263            CompressionAlgorithm::Zstd => {
264                self.compress_zstd(data, level.for_algorithm(algorithm))?
265            }
266            CompressionAlgorithm::Lzma => {
267                self.compress_lzma(data, level.for_algorithm(algorithm))?
268            }
269            CompressionAlgorithm::Brotli => {
270                self.compress_brotli(data, level.for_algorithm(algorithm))?
271            }
272            CompressionAlgorithm::Lz4 => self.compress_lz4(data, level.for_algorithm(algorithm))?,
273        };
274
275        let compression_time_ms = start_time.elapsed().as_millis() as u64;
276        let ratio = if data.is_empty() {
277            1.0
278        } else {
279            compressed_data.len() as f32 / data.len() as f32
280        };
281
282        let compressed_size = compressed_data.len();
283
284        Ok(CompressionResult {
285            data: compressed_data,
286            algorithm,
287            level,
288            original_size: data.len(),
289            compressed_size,
290            ratio,
291            compression_time_ms,
292        })
293    }
294
295    /// Decompress data
296    pub fn decompress_data(
297        &self,
298        compressed_data: &[u8],
299        algorithm: CompressionAlgorithm,
300    ) -> Result<DecompressionResult> {
301        let start_time = std::time::Instant::now();
302
303        let decompressed_data = match algorithm {
304            CompressionAlgorithm::None => compressed_data.to_vec(),
305            CompressionAlgorithm::Gzip => self.decompress_gzip(compressed_data)?,
306            CompressionAlgorithm::Zstd => self.decompress_zstd(compressed_data)?,
307            CompressionAlgorithm::Lzma => self.decompress_lzma(compressed_data)?,
308            CompressionAlgorithm::Brotli => self.decompress_brotli(compressed_data)?,
309            CompressionAlgorithm::Lz4 => self.decompress_lz4(compressed_data)?,
310        };
311
312        let decompression_time_ms = start_time.elapsed().as_millis() as u64;
313
314        Ok(DecompressionResult {
315            data: decompressed_data,
316            algorithm,
317            decompression_time_ms,
318        })
319    }
320
321    /// Benchmark compression algorithms for given data
322    pub fn benchmark_algorithms(&self, data: &[u8]) -> Result<Vec<CompressionResult>> {
323        let algorithms = [
324            CompressionAlgorithm::Gzip,
325            CompressionAlgorithm::Zstd,
326            CompressionAlgorithm::Lzma,
327            CompressionAlgorithm::Brotli,
328            CompressionAlgorithm::Lz4,
329        ];
330
331        let mut results = Vec::new();
332
333        for algorithm in &algorithms {
334            let result = self.compress_data(data, *algorithm, CompressionLevel(6))?;
335            results.push(result);
336        }
337
338        // Sort by compression ratio (best first)
339        results.sort_by(|a, b| {
340            a.ratio
341                .partial_cmp(&b.ratio)
342                .unwrap_or(std::cmp::Ordering::Equal)
343        });
344
345        Ok(results)
346    }
347
348    /// Adaptively choose compression level based on resource characteristics
349    fn adaptive_level(&self, resource: &Resource) -> CompressionLevel {
350        let data_size = resource.data.len();
351
352        match resource.resource_type {
353            ResourceType::Model | ResourceType::Binary => {
354                // For model files, balance speed and compression based on size
355                if data_size > 10 * 1024 * 1024 {
356                    // > 10MB
357                    CompressionLevel(3) // Fast compression for large files
358                } else if data_size > 1024 * 1024 {
359                    // > 1MB
360                    CompressionLevel(6) // Balanced
361                } else {
362                    CompressionLevel(9) // High compression for smaller files
363                }
364            }
365            ResourceType::Source | ResourceType::Config | ResourceType::Documentation => {
366                // Text files compress well, so we can afford higher levels
367                CompressionLevel(8)
368            }
369            ResourceType::Text | ResourceType::Metadata => {
370                // JSON/text data often compresses very well
371                CompressionLevel(7)
372            }
373            _ => CompressionLevel(6), // Default balanced
374        }
375    }
376
377    /// Compress with Gzip
378    fn compress_gzip(&self, data: &[u8], level: u32) -> Result<Vec<u8>> {
379        use flate2::{write::GzEncoder, Compression};
380
381        let mut encoder = GzEncoder::new(Vec::new(), Compression::new(level));
382        encoder.write_all(data).map_err(|e| {
383            TorshError::SerializationError(format!("Gzip compression failed: {}", e))
384        })?;
385
386        encoder
387            .finish()
388            .map_err(|e| TorshError::SerializationError(format!("Gzip finalization failed: {}", e)))
389    }
390
391    /// Decompress Gzip
392    fn decompress_gzip(&self, data: &[u8]) -> Result<Vec<u8>> {
393        use flate2::read::GzDecoder;
394
395        let mut decoder = GzDecoder::new(data);
396        let mut result = Vec::new();
397        decoder.read_to_end(&mut result).map_err(|e| {
398            TorshError::SerializationError(format!("Gzip decompression failed: {}", e))
399        })?;
400
401        Ok(result)
402    }
403
404    /// Compress with Zstandard
405    fn compress_zstd(&self, data: &[u8], level: u32) -> Result<Vec<u8>> {
406        zstd::encode_all(data, level as i32).map_err(|e| {
407            TorshError::SerializationError(format!("Zstandard compression failed: {}", e))
408        })
409    }
410
411    /// Decompress Zstandard
412    fn decompress_zstd(&self, data: &[u8]) -> Result<Vec<u8>> {
413        zstd::decode_all(data).map_err(|e| {
414            TorshError::SerializationError(format!("Zstandard decompression failed: {}", e))
415        })
416    }
417
418    /// Compress with LZMA
419    fn compress_lzma(&self, data: &[u8], _level: u32) -> Result<Vec<u8>> {
420        let mut output = Vec::new();
421        lzma_rs::lzma_compress(&mut std::io::Cursor::new(data), &mut output).map_err(|e| {
422            TorshError::SerializationError(format!("LZMA compression failed: {}", e))
423        })?;
424
425        Ok(output)
426    }
427
428    /// Decompress LZMA
429    fn decompress_lzma(&self, data: &[u8]) -> Result<Vec<u8>> {
430        let mut output = Vec::new();
431        lzma_rs::lzma_decompress(&mut std::io::Cursor::new(data), &mut output).map_err(|e| {
432            TorshError::SerializationError(format!("LZMA decompression failed: {}", e))
433        })?;
434
435        Ok(output)
436    }
437
438    /// Compress with Brotli
439    fn compress_brotli(&self, data: &[u8], level: u32) -> Result<Vec<u8>> {
440        // For now, fall back to gzip since brotli might not be available
441        // In a real implementation, you would add brotli dependency and use it
442        self.compress_gzip(data, level.min(9))
443    }
444
445    /// Decompress Brotli
446    fn decompress_brotli(&self, data: &[u8]) -> Result<Vec<u8>> {
447        // For now, fall back to gzip since brotli might not be available
448        // In a real implementation, you would add brotli dependency and use it
449        self.decompress_gzip(data)
450    }
451
452    /// Compress with LZ4
453    fn compress_lz4(&self, data: &[u8], _level: u32) -> Result<Vec<u8>> {
454        // For now, fall back to gzip since lz4 might not be available
455        // In a real implementation, you would add lz4 dependency and use it
456        self.compress_gzip(data, 1) // Use fast compression as LZ4 is meant to be fast
457    }
458
459    /// Decompress LZ4
460    fn decompress_lz4(&self, data: &[u8]) -> Result<Vec<u8>> {
461        // For now, fall back to gzip since lz4 might not be available
462        // In a real implementation, you would add lz4 dependency and use it
463        self.decompress_gzip(data)
464    }
465}
466
467impl Default for AdvancedCompressor {
468    fn default() -> Self {
469        Self::new()
470    }
471}
472
473/// Parallel compression utilities for large resources
474pub struct ParallelCompressor {
475    compressor: AdvancedCompressor,
476    chunk_size: usize,
477    num_threads: usize,
478}
479
480impl ParallelCompressor {
481    /// Create new parallel compressor
482    pub fn new(compressor: AdvancedCompressor) -> Self {
483        Self {
484            compressor,
485            chunk_size: 1024 * 1024, // 1MB chunks
486            num_threads: scirs2_core::parallel_ops::num_threads(),
487        }
488    }
489
490    /// Set chunk size for parallel compression
491    pub fn with_chunk_size(mut self, chunk_size: usize) -> Self {
492        self.chunk_size = chunk_size;
493        self
494    }
495
496    /// Set number of threads for parallel compression
497    pub fn with_num_threads(mut self, num_threads: usize) -> Self {
498        self.num_threads = num_threads;
499        self
500    }
501
502    /// Compress large data in parallel chunks
503    pub fn compress_parallel(
504        &self,
505        data: &[u8],
506        algorithm: CompressionAlgorithm,
507        level: CompressionLevel,
508    ) -> Result<CompressionResult> {
509        if data.len() < self.chunk_size * 2 {
510            // For small data, use regular compression
511            return self.compressor.compress_data(data, algorithm, level);
512        }
513
514        let start_time = std::time::Instant::now();
515
516        // Split data into chunks
517        let num_chunks = (data.len() + self.chunk_size - 1) / self.chunk_size;
518        let chunks: Vec<&[u8]> = (0..num_chunks)
519            .map(|i| {
520                let start = i * self.chunk_size;
521                let end = (start + self.chunk_size).min(data.len());
522                &data[start..end]
523            })
524            .collect();
525
526        // Compress chunks in parallel using scirs2-core's parallel operations
527        use scirs2_core::parallel_ops::{IntoParallelIterator, ParallelIterator};
528
529        let compressed_chunks: Vec<_> = chunks
530            .into_par_iter()
531            .map(|chunk| {
532                self.compressor
533                    .compress_data(chunk, algorithm, level)
534                    .map(|result| result.data)
535            })
536            .collect::<Result<Vec<_>>>()?;
537
538        // Combine compressed chunks
539        let mut combined_data = Vec::new();
540        combined_data.extend_from_slice(&(compressed_chunks.len() as u64).to_le_bytes());
541
542        for chunk in &compressed_chunks {
543            combined_data.extend_from_slice(&(chunk.len() as u64).to_le_bytes());
544            combined_data.extend_from_slice(chunk);
545        }
546
547        let compression_time_ms = start_time.elapsed().as_millis() as u64;
548        let compressed_size = combined_data.len();
549        let ratio = if data.is_empty() {
550            1.0
551        } else {
552            compressed_size as f32 / data.len() as f32
553        };
554
555        Ok(CompressionResult {
556            data: combined_data,
557            algorithm,
558            level,
559            original_size: data.len(),
560            compressed_size,
561            ratio,
562            compression_time_ms,
563        })
564    }
565
566    /// Decompress parallel-compressed data
567    pub fn decompress_parallel(
568        &self,
569        compressed_data: &[u8],
570        algorithm: CompressionAlgorithm,
571    ) -> Result<DecompressionResult> {
572        if compressed_data.len() < 8 {
573            // Not parallel-compressed, use regular decompression
574            return self.compressor.decompress_data(compressed_data, algorithm);
575        }
576
577        let start_time = std::time::Instant::now();
578
579        // Read number of chunks
580        let num_chunks = u64::from_le_bytes(
581            compressed_data[0..8]
582                .try_into()
583                .expect("slice of 8 bytes should convert to [u8; 8]"),
584        ) as usize;
585        let mut offset = 8;
586
587        // Read chunk sizes and data
588        let mut chunks = Vec::with_capacity(num_chunks);
589        for _ in 0..num_chunks {
590            if offset + 8 > compressed_data.len() {
591                return Err(TorshError::InvalidArgument(
592                    "Invalid parallel-compressed data format".to_string(),
593                ));
594            }
595
596            let chunk_size = u64::from_le_bytes(
597                compressed_data[offset..offset + 8]
598                    .try_into()
599                    .expect("slice of 8 bytes should convert to [u8; 8]"),
600            ) as usize;
601            offset += 8;
602
603            if offset + chunk_size > compressed_data.len() {
604                return Err(TorshError::InvalidArgument(
605                    "Invalid chunk size in parallel-compressed data".to_string(),
606                ));
607            }
608
609            chunks.push(&compressed_data[offset..offset + chunk_size]);
610            offset += chunk_size;
611        }
612
613        // Decompress chunks in parallel
614        use scirs2_core::parallel_ops::{IntoParallelIterator, ParallelIterator};
615
616        let decompressed_chunks: Vec<_> = chunks
617            .into_par_iter()
618            .map(|chunk| {
619                self.compressor
620                    .decompress_data(chunk, algorithm)
621                    .map(|result| result.data)
622            })
623            .collect::<Result<Vec<_>>>()?;
624
625        // Combine decompressed chunks
626        let combined_data = decompressed_chunks.into_iter().flatten().collect();
627
628        let decompression_time_ms = start_time.elapsed().as_millis() as u64;
629
630        Ok(DecompressionResult {
631            data: combined_data,
632            algorithm,
633            decompression_time_ms,
634        })
635    }
636}
637
638/// Compression statistics collector
639#[derive(Debug, Clone, Default)]
640pub struct CompressionStats {
641    /// Total bytes compressed
642    pub total_compressed: usize,
643    /// Total bytes after compression
644    pub total_after_compression: usize,
645    /// Total compression time
646    pub total_time_ms: u64,
647    /// Algorithm usage statistics
648    pub algorithm_usage: HashMap<CompressionAlgorithm, u32>,
649    /// Average compression ratios by algorithm
650    pub algorithm_ratios: HashMap<CompressionAlgorithm, f32>,
651}
652
653impl CompressionStats {
654    /// Create new stats collector
655    pub fn new() -> Self {
656        Self::default()
657    }
658
659    /// Record compression result
660    pub fn record(&mut self, result: &CompressionResult) {
661        self.total_compressed += result.original_size;
662        self.total_after_compression += result.compressed_size;
663        self.total_time_ms += result.compression_time_ms;
664
665        *self.algorithm_usage.entry(result.algorithm).or_insert(0) += 1;
666
667        // Update rolling average of compression ratios
668        let current_ratio = self.algorithm_ratios.get(&result.algorithm).unwrap_or(&0.0);
669        let count = self.algorithm_usage[&result.algorithm] as f32;
670        let new_ratio = (current_ratio * (count - 1.0) + result.ratio) / count;
671        self.algorithm_ratios.insert(result.algorithm, new_ratio);
672    }
673
674    /// Get overall compression ratio
675    pub fn overall_ratio(&self) -> f32 {
676        if self.total_compressed == 0 {
677            1.0
678        } else {
679            self.total_after_compression as f32 / self.total_compressed as f32
680        }
681    }
682
683    /// Get space saved in bytes
684    pub fn space_saved(&self) -> usize {
685        self.total_compressed
686            .saturating_sub(self.total_after_compression)
687    }
688
689    /// Get space saved as percentage
690    pub fn space_saved_percent(&self) -> f32 {
691        if self.total_compressed == 0 {
692            0.0
693        } else {
694            (self.space_saved() as f32 / self.total_compressed as f32) * 100.0
695        }
696    }
697
698    /// Get most used algorithm
699    pub fn most_used_algorithm(&self) -> Option<CompressionAlgorithm> {
700        self.algorithm_usage
701            .iter()
702            .max_by_key(|(_, &count)| count)
703            .map(|(&algorithm, _)| algorithm)
704    }
705
706    /// Get best performing algorithm (by compression ratio)
707    pub fn best_performing_algorithm(&self) -> Option<CompressionAlgorithm> {
708        self.algorithm_ratios
709            .iter()
710            .min_by(|(_, &a), (_, &b)| a.partial_cmp(&b).unwrap_or(std::cmp::Ordering::Equal))
711            .map(|(&algorithm, _)| algorithm)
712    }
713}
714
715#[cfg(test)]
716mod tests {
717    use super::*;
718
719    #[test]
720    fn test_compression_config() {
721        let config = CompressionConfig::new()
722            .with_algorithm(CompressionAlgorithm::Zstd)
723            .with_level(CompressionLevel(8))
724            .with_strategy(CompressionStrategy::Size);
725
726        assert_eq!(config.default_algorithm, CompressionAlgorithm::Zstd);
727        assert_eq!(config.default_level.0, 8);
728        assert_eq!(config.strategy, CompressionStrategy::Size);
729    }
730
731    #[test]
732    fn test_compression_level() {
733        let level = CompressionLevel(15);
734
735        assert_eq!(level.for_algorithm(CompressionAlgorithm::Gzip), 9);
736        assert_eq!(level.for_algorithm(CompressionAlgorithm::Zstd), 15);
737        assert_eq!(level.for_algorithm(CompressionAlgorithm::Brotli), 11);
738    }
739
740    #[test]
741    fn test_basic_compression() {
742        let compressor = AdvancedCompressor::new();
743        let test_data = "Hello, World! ".repeat(100);
744
745        let result = compressor
746            .compress_data(
747                test_data.as_bytes(),
748                CompressionAlgorithm::Gzip,
749                CompressionLevel(6),
750            )
751            .unwrap();
752
753        assert_eq!(result.algorithm, CompressionAlgorithm::Gzip);
754        assert_eq!(result.original_size, test_data.len());
755        assert!(result.compressed_size < result.original_size);
756        assert!(result.ratio < 1.0);
757    }
758
759    #[test]
760    fn test_decompression() {
761        let compressor = AdvancedCompressor::new();
762        let test_data = "This is test data for compression and decompression.".repeat(10);
763
764        let compression_result = compressor
765            .compress_data(
766                test_data.as_bytes(),
767                CompressionAlgorithm::Gzip,
768                CompressionLevel(6),
769            )
770            .unwrap();
771
772        let decompression_result = compressor
773            .decompress_data(&compression_result.data, CompressionAlgorithm::Gzip)
774            .unwrap();
775
776        assert_eq!(decompression_result.data, test_data.as_bytes());
777        assert_eq!(decompression_result.algorithm, CompressionAlgorithm::Gzip);
778    }
779
780    #[test]
781    fn test_resource_compression() {
782        let compressor = AdvancedCompressor::new();
783
784        let resource = Resource::new(
785            "test.txt".to_string(),
786            ResourceType::Text,
787            "This is a test text file with some content that should compress well."
788                .repeat(20)
789                .as_bytes()
790                .to_vec(),
791        );
792
793        let result = compressor.compress_resource(&resource).unwrap();
794
795        // Text should compress well
796        assert!(result.ratio < 0.5);
797        assert_eq!(result.original_size, resource.data.len());
798    }
799
800    #[test]
801    fn test_compression_stats() {
802        let mut stats = CompressionStats::new();
803
804        let result1 = CompressionResult {
805            data: vec![0; 100],
806            algorithm: CompressionAlgorithm::Gzip,
807            level: CompressionLevel(6),
808            original_size: 200,
809            compressed_size: 100,
810            ratio: 0.5,
811            compression_time_ms: 10,
812        };
813
814        let result2 = CompressionResult {
815            data: vec![0; 80],
816            algorithm: CompressionAlgorithm::Zstd,
817            level: CompressionLevel(6),
818            original_size: 200,
819            compressed_size: 80,
820            ratio: 0.4,
821            compression_time_ms: 8,
822        };
823
824        stats.record(&result1);
825        stats.record(&result2);
826
827        assert_eq!(stats.total_compressed, 400);
828        assert_eq!(stats.total_after_compression, 180);
829        assert_eq!(stats.space_saved(), 220);
830        assert!((stats.space_saved_percent() - 55.0).abs() < 0.1);
831
832        assert_eq!(stats.algorithm_usage[&CompressionAlgorithm::Gzip], 1);
833        assert_eq!(stats.algorithm_usage[&CompressionAlgorithm::Zstd], 1);
834
835        assert_eq!(
836            stats.best_performing_algorithm(),
837            Some(CompressionAlgorithm::Zstd)
838        );
839    }
840
841    #[test]
842    fn test_small_file_skip() {
843        let compressor = AdvancedCompressor::new();
844        let small_data = b"tiny";
845
846        // Create a small resource to test the size threshold
847        let small_resource = Resource::new(
848            "small.txt".to_string(),
849            ResourceType::Text,
850            small_data.to_vec(),
851        );
852
853        let result = compressor.compress_resource(&small_resource).unwrap();
854
855        // Small files should not be compressed (below threshold)
856        assert_eq!(result.algorithm, CompressionAlgorithm::None);
857        assert_eq!(result.data, small_data);
858        assert_eq!(result.ratio, 1.0);
859    }
860
861    #[test]
862    fn test_benchmark_algorithms() {
863        let compressor = AdvancedCompressor::new();
864        let test_data = "This is benchmark data. ".repeat(100);
865
866        let results = compressor
867            .benchmark_algorithms(test_data.as_bytes())
868            .unwrap();
869
870        // Should have results for multiple algorithms
871        assert!(results.len() >= 2);
872
873        // Results should be sorted by compression ratio
874        for i in 1..results.len() {
875            assert!(results[i - 1].ratio <= results[i].ratio);
876        }
877
878        // All should have compressed the same original data
879        for result in &results {
880            assert_eq!(result.original_size, test_data.len());
881        }
882    }
883}
torsh_package/compression.rs

torsh_package/
compression.rs