torsh_package/
compression.rs

1//! Advanced compression algorithms for packages
2//!
3//! This module provides multiple compression algorithms optimized for different
4//! types of data commonly found in ML packages including models, source code,
5//! and configuration files.
6
7use std::collections::HashMap;
8
9use serde::{Deserialize, Serialize};
10use torsh_core::error::{Result, TorshError};
11
12use crate::resources::{Resource, ResourceType};
13
14/// Supported compression algorithms
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
16pub enum CompressionAlgorithm {
17    /// No compression
18    None,
19    /// Gzip compression (fast, decent compression)
20    Gzip,
21    /// Zstandard compression (excellent speed/ratio tradeoff)
22    Zstd,
23    /// LZMA compression (high compression ratio, slower)
24    Lzma,
25    /// Brotli compression (good for text/JSON)
26    Brotli,
27    /// LZ4 compression (extremely fast, lower ratio)
28    Lz4,
29}
30
31/// Compression level (0-22 depending on algorithm)
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
33pub struct CompressionLevel(pub u32);
34
35/// Compression strategy for different data types
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum CompressionStrategy {
38    /// Optimize for speed
39    Speed,
40    /// Optimize for size
41    Size,
42    /// Balanced speed/size
43    Balanced,
44    /// Adaptive based on data type
45    Adaptive,
46}
47
48/// Compression configuration
49#[derive(Debug, Clone)]
50pub struct CompressionConfig {
51    /// Default algorithm to use
52    pub default_algorithm: CompressionAlgorithm,
53    /// Default compression level
54    pub default_level: CompressionLevel,
55    /// Compression strategy
56    pub strategy: CompressionStrategy,
57    /// Per-resource-type algorithm overrides
58    pub algorithm_overrides: HashMap<ResourceType, CompressionAlgorithm>,
59    /// Minimum size threshold for compression (bytes)
60    pub min_size_threshold: usize,
61    /// Maximum size for in-memory compression
62    pub max_memory_size: usize,
63    /// Enable parallel compression for large resources
64    pub parallel_compression: bool,
65}
66
67/// Advanced compressor with multiple algorithm support
68pub struct AdvancedCompressor {
69    config: CompressionConfig,
70}
71
72/// Compression result with metadata
73#[derive(Debug, Clone)]
74pub struct CompressionResult {
75    /// Compressed data
76    pub data: Vec<u8>,
77    /// Algorithm used
78    pub algorithm: CompressionAlgorithm,
79    /// Compression level used
80    pub level: CompressionLevel,
81    /// Original size
82    pub original_size: usize,
83    /// Compressed size
84    pub compressed_size: usize,
85    /// Compression ratio (compressed/original)
86    pub ratio: f32,
87    /// Compression time in milliseconds
88    pub compression_time_ms: u64,
89}
90
91/// Decompression result
92#[derive(Debug, Clone)]
93pub struct DecompressionResult {
94    /// Decompressed data
95    pub data: Vec<u8>,
96    /// Algorithm used for compression
97    pub algorithm: CompressionAlgorithm,
98    /// Decompression time in milliseconds
99    pub decompression_time_ms: u64,
100}
101
102impl Default for CompressionLevel {
103    fn default() -> Self {
104        CompressionLevel(6)
105    }
106}
107
108impl CompressionLevel {
109    /// Create a new compression level
110    pub fn new(level: u32) -> Self {
111        CompressionLevel(level)
112    }
113
114    /// Get level for specific algorithm (clamps to valid range)
115    pub fn for_algorithm(&self, algorithm: CompressionAlgorithm) -> u32 {
116        match algorithm {
117            CompressionAlgorithm::None => 0,
118            CompressionAlgorithm::Gzip => self.0.min(9),
119            CompressionAlgorithm::Zstd => self.0.min(22),
120            CompressionAlgorithm::Lzma => self.0.min(9),
121            CompressionAlgorithm::Brotli => self.0.min(11),
122            CompressionAlgorithm::Lz4 => self.0.min(16),
123        }
124    }
125}
126
127impl Default for CompressionConfig {
128    fn default() -> Self {
129        let mut algorithm_overrides = HashMap::new();
130
131        // Text files compress well with Brotli
132        algorithm_overrides.insert(ResourceType::Source, CompressionAlgorithm::Brotli);
133        algorithm_overrides.insert(ResourceType::Config, CompressionAlgorithm::Brotli);
134        algorithm_overrides.insert(ResourceType::Documentation, CompressionAlgorithm::Brotli);
135        algorithm_overrides.insert(ResourceType::Text, CompressionAlgorithm::Brotli);
136        algorithm_overrides.insert(ResourceType::Metadata, CompressionAlgorithm::Brotli);
137
138        // Binary data works well with Zstandard
139        algorithm_overrides.insert(ResourceType::Model, CompressionAlgorithm::Zstd);
140        algorithm_overrides.insert(ResourceType::Data, CompressionAlgorithm::Zstd);
141        algorithm_overrides.insert(ResourceType::Binary, CompressionAlgorithm::Zstd);
142
143        Self {
144            default_algorithm: CompressionAlgorithm::Zstd,
145            default_level: CompressionLevel::default(),
146            strategy: CompressionStrategy::Balanced,
147            algorithm_overrides,
148            min_size_threshold: 256, // Don't compress files smaller than 256 bytes
149            max_memory_size: 100 * 1024 * 1024, // 100MB
150            parallel_compression: true,
151        }
152    }
153}
154
155impl CompressionConfig {
156    /// Create new compression config
157    pub fn new() -> Self {
158        Self::default()
159    }
160
161    /// Set default algorithm
162    pub fn with_algorithm(mut self, algorithm: CompressionAlgorithm) -> Self {
163        self.default_algorithm = algorithm;
164        self
165    }
166
167    /// Set default compression level
168    pub fn with_level(mut self, level: CompressionLevel) -> Self {
169        self.default_level = level;
170        self
171    }
172
173    /// Set compression strategy
174    pub fn with_strategy(mut self, strategy: CompressionStrategy) -> Self {
175        self.strategy = strategy;
176        self
177    }
178
179    /// Set minimum size threshold
180    pub fn with_min_threshold(mut self, threshold: usize) -> Self {
181        self.min_size_threshold = threshold;
182        self
183    }
184
185    /// Enable/disable parallel compression
186    pub fn with_parallel(mut self, parallel: bool) -> Self {
187        self.parallel_compression = parallel;
188        self
189    }
190
191    /// Get algorithm for resource type
192    pub fn algorithm_for_resource(&self, resource_type: ResourceType) -> CompressionAlgorithm {
193        self.algorithm_overrides
194            .get(&resource_type)
195            .copied()
196            .unwrap_or(self.default_algorithm)
197    }
198
199    /// Get compression level adjusted for strategy
200    pub fn level_for_strategy(&self, strategy: CompressionStrategy) -> CompressionLevel {
201        match strategy {
202            CompressionStrategy::Speed => CompressionLevel(1),
203            CompressionStrategy::Size => CompressionLevel(9),
204            CompressionStrategy::Balanced => CompressionLevel(6),
205            CompressionStrategy::Adaptive => self.default_level,
206        }
207    }
208}
209
210impl AdvancedCompressor {
211    /// Create new compressor with default config
212    pub fn new() -> Self {
213        Self {
214            config: CompressionConfig::default(),
215        }
216    }
217
218    /// Create compressor with custom config
219    pub fn with_config(config: CompressionConfig) -> Self {
220        Self { config }
221    }
222
223    /// Compress a resource
224    pub fn compress_resource(&self, resource: &Resource) -> Result<CompressionResult> {
225        // Check size threshold
226        if resource.data.len() < self.config.min_size_threshold {
227            return Ok(CompressionResult {
228                data: resource.data.clone(),
229                algorithm: CompressionAlgorithm::None,
230                level: CompressionLevel(0),
231                original_size: resource.data.len(),
232                compressed_size: resource.data.len(),
233                ratio: 1.0,
234                compression_time_ms: 0,
235            });
236        }
237
238        // Select algorithm and level
239        let algorithm = self.config.algorithm_for_resource(resource.resource_type);
240        let level = match self.config.strategy {
241            CompressionStrategy::Adaptive => self.adaptive_level(resource),
242            strategy => self.config.level_for_strategy(strategy),
243        };
244
245        self.compress_data(&resource.data, algorithm, level)
246    }
247
248    /// Compress raw data with specific algorithm and level
249    pub fn compress_data(
250        &self,
251        data: &[u8],
252        algorithm: CompressionAlgorithm,
253        level: CompressionLevel,
254    ) -> Result<CompressionResult> {
255        let start_time = std::time::Instant::now();
256
257        let compressed_data = match algorithm {
258            CompressionAlgorithm::None => data.to_vec(),
259            CompressionAlgorithm::Gzip => {
260                self.compress_gzip(data, level.for_algorithm(algorithm))?
261            }
262            CompressionAlgorithm::Zstd => {
263                self.compress_zstd(data, level.for_algorithm(algorithm))?
264            }
265            CompressionAlgorithm::Lzma => {
266                self.compress_lzma(data, level.for_algorithm(algorithm))?
267            }
268            CompressionAlgorithm::Brotli => {
269                self.compress_brotli(data, level.for_algorithm(algorithm))?
270            }
271            CompressionAlgorithm::Lz4 => self.compress_lz4(data, level.for_algorithm(algorithm))?,
272        };
273
274        let compression_time_ms = start_time.elapsed().as_millis() as u64;
275        let ratio = if data.is_empty() {
276            1.0
277        } else {
278            compressed_data.len() as f32 / data.len() as f32
279        };
280
281        let compressed_size = compressed_data.len();
282
283        Ok(CompressionResult {
284            data: compressed_data,
285            algorithm,
286            level,
287            original_size: data.len(),
288            compressed_size,
289            ratio,
290            compression_time_ms,
291        })
292    }
293
294    /// Decompress data
295    pub fn decompress_data(
296        &self,
297        compressed_data: &[u8],
298        algorithm: CompressionAlgorithm,
299    ) -> Result<DecompressionResult> {
300        let start_time = std::time::Instant::now();
301
302        let decompressed_data = match algorithm {
303            CompressionAlgorithm::None => compressed_data.to_vec(),
304            CompressionAlgorithm::Gzip => self.decompress_gzip(compressed_data)?,
305            CompressionAlgorithm::Zstd => self.decompress_zstd(compressed_data)?,
306            CompressionAlgorithm::Lzma => self.decompress_lzma(compressed_data)?,
307            CompressionAlgorithm::Brotli => self.decompress_brotli(compressed_data)?,
308            CompressionAlgorithm::Lz4 => self.decompress_lz4(compressed_data)?,
309        };
310
311        let decompression_time_ms = start_time.elapsed().as_millis() as u64;
312
313        Ok(DecompressionResult {
314            data: decompressed_data,
315            algorithm,
316            decompression_time_ms,
317        })
318    }
319
320    /// Benchmark compression algorithms for given data
321    pub fn benchmark_algorithms(&self, data: &[u8]) -> Result<Vec<CompressionResult>> {
322        let algorithms = [
323            CompressionAlgorithm::Gzip,
324            CompressionAlgorithm::Zstd,
325            CompressionAlgorithm::Lzma,
326            CompressionAlgorithm::Brotli,
327            CompressionAlgorithm::Lz4,
328        ];
329
330        let mut results = Vec::new();
331
332        for algorithm in &algorithms {
333            let result = self.compress_data(data, *algorithm, CompressionLevel(6))?;
334            results.push(result);
335        }
336
337        // Sort by compression ratio (best first)
338        results.sort_by(|a, b| {
339            a.ratio
340                .partial_cmp(&b.ratio)
341                .unwrap_or(std::cmp::Ordering::Equal)
342        });
343
344        Ok(results)
345    }
346
347    /// Adaptively choose compression level based on resource characteristics
348    fn adaptive_level(&self, resource: &Resource) -> CompressionLevel {
349        let data_size = resource.data.len();
350
351        match resource.resource_type {
352            ResourceType::Model | ResourceType::Binary => {
353                // For model files, balance speed and compression based on size
354                if data_size > 10 * 1024 * 1024 {
355                    // > 10MB
356                    CompressionLevel(3) // Fast compression for large files
357                } else if data_size > 1024 * 1024 {
358                    // > 1MB
359                    CompressionLevel(6) // Balanced
360                } else {
361                    CompressionLevel(9) // High compression for smaller files
362                }
363            }
364            ResourceType::Source | ResourceType::Config | ResourceType::Documentation => {
365                // Text files compress well, so we can afford higher levels
366                CompressionLevel(8)
367            }
368            ResourceType::Text | ResourceType::Metadata => {
369                // JSON/text data often compresses very well
370                CompressionLevel(7)
371            }
372            _ => CompressionLevel(6), // Default balanced
373        }
374    }
375
376    /// Compress with Gzip
377    fn compress_gzip(&self, data: &[u8], level: u32) -> Result<Vec<u8>> {
378        use oxiarc_deflate::gzip::gzip_compress;
379
380        gzip_compress(data, level as u8)
381            .map_err(|e| TorshError::SerializationError(format!("Gzip compression failed: {}", e)))
382    }
383
384    /// Decompress Gzip
385    fn decompress_gzip(&self, data: &[u8]) -> Result<Vec<u8>> {
386        use oxiarc_deflate::gzip::gzip_decompress;
387
388        gzip_decompress(data).map_err(|e| {
389            TorshError::SerializationError(format!("Gzip decompression failed: {}", e))
390        })
391    }
392
393    /// Compress with Zstandard
394    fn compress_zstd(&self, data: &[u8], level: u32) -> Result<Vec<u8>> {
395        use oxiarc_zstd::encode_all;
396
397        encode_all(data, level as i32).map_err(|e| {
398            TorshError::SerializationError(format!("Zstandard compression failed: {}", e))
399        })
400    }
401
402    /// Decompress Zstandard
403    fn decompress_zstd(&self, data: &[u8]) -> Result<Vec<u8>> {
404        use oxiarc_zstd::decode_all;
405
406        decode_all(data).map_err(|e| {
407            TorshError::SerializationError(format!("Zstandard decompression failed: {}", e))
408        })
409    }
410
411    /// Compress with LZMA
412    fn compress_lzma(&self, data: &[u8], _level: u32) -> Result<Vec<u8>> {
413        let mut output = Vec::new();
414        lzma_rs::lzma_compress(&mut std::io::Cursor::new(data), &mut output).map_err(|e| {
415            TorshError::SerializationError(format!("LZMA compression failed: {}", e))
416        })?;
417
418        Ok(output)
419    }
420
421    /// Decompress LZMA
422    fn decompress_lzma(&self, data: &[u8]) -> Result<Vec<u8>> {
423        let mut output = Vec::new();
424        lzma_rs::lzma_decompress(&mut std::io::Cursor::new(data), &mut output).map_err(|e| {
425            TorshError::SerializationError(format!("LZMA decompression failed: {}", e))
426        })?;
427
428        Ok(output)
429    }
430
431    /// Compress with Brotli
432    fn compress_brotli(&self, data: &[u8], level: u32) -> Result<Vec<u8>> {
433        // For now, fall back to gzip since brotli might not be available
434        // In a real implementation, you would add brotli dependency and use it
435        self.compress_gzip(data, level.min(9))
436    }
437
438    /// Decompress Brotli
439    fn decompress_brotli(&self, data: &[u8]) -> Result<Vec<u8>> {
440        // For now, fall back to gzip since brotli might not be available
441        // In a real implementation, you would add brotli dependency and use it
442        self.decompress_gzip(data)
443    }
444
445    /// Compress with LZ4
446    fn compress_lz4(&self, data: &[u8], _level: u32) -> Result<Vec<u8>> {
447        // For now, fall back to gzip since lz4 might not be available
448        // In a real implementation, you would add lz4 dependency and use it
449        self.compress_gzip(data, 1) // Use fast compression as LZ4 is meant to be fast
450    }
451
452    /// Decompress LZ4
453    fn decompress_lz4(&self, data: &[u8]) -> Result<Vec<u8>> {
454        // For now, fall back to gzip since lz4 might not be available
455        // In a real implementation, you would add lz4 dependency and use it
456        self.decompress_gzip(data)
457    }
458}
459
460impl Default for AdvancedCompressor {
461    fn default() -> Self {
462        Self::new()
463    }
464}
465
466/// Parallel compression utilities for large resources
467pub struct ParallelCompressor {
468    compressor: AdvancedCompressor,
469    chunk_size: usize,
470    num_threads: usize,
471}
472
473impl ParallelCompressor {
474    /// Create new parallel compressor
475    pub fn new(compressor: AdvancedCompressor) -> Self {
476        Self {
477            compressor,
478            chunk_size: 1024 * 1024, // 1MB chunks
479            num_threads: scirs2_core::parallel_ops::num_threads(),
480        }
481    }
482
483    /// Set chunk size for parallel compression
484    pub fn with_chunk_size(mut self, chunk_size: usize) -> Self {
485        self.chunk_size = chunk_size;
486        self
487    }
488
489    /// Set number of threads for parallel compression
490    pub fn with_num_threads(mut self, num_threads: usize) -> Self {
491        self.num_threads = num_threads;
492        self
493    }
494
495    /// Compress large data in parallel chunks
496    pub fn compress_parallel(
497        &self,
498        data: &[u8],
499        algorithm: CompressionAlgorithm,
500        level: CompressionLevel,
501    ) -> Result<CompressionResult> {
502        if data.len() < self.chunk_size * 2 {
503            // For small data, use regular compression
504            return self.compressor.compress_data(data, algorithm, level);
505        }
506
507        let start_time = std::time::Instant::now();
508
509        // Split data into chunks
510        let num_chunks = (data.len() + self.chunk_size - 1) / self.chunk_size;
511        let chunks: Vec<&[u8]> = (0..num_chunks)
512            .map(|i| {
513                let start = i * self.chunk_size;
514                let end = (start + self.chunk_size).min(data.len());
515                &data[start..end]
516            })
517            .collect();
518
519        // Compress chunks in parallel using scirs2-core's parallel operations
520        use scirs2_core::parallel_ops::{IntoParallelIterator, ParallelIterator};
521
522        let compressed_chunks: Vec<_> = chunks
523            .into_par_iter()
524            .map(|chunk| {
525                self.compressor
526                    .compress_data(chunk, algorithm, level)
527                    .map(|result| result.data)
528            })
529            .collect::<Result<Vec<_>>>()?;
530
531        // Combine compressed chunks
532        let mut combined_data = Vec::new();
533        combined_data.extend_from_slice(&(compressed_chunks.len() as u64).to_le_bytes());
534
535        for chunk in &compressed_chunks {
536            combined_data.extend_from_slice(&(chunk.len() as u64).to_le_bytes());
537            combined_data.extend_from_slice(chunk);
538        }
539
540        let compression_time_ms = start_time.elapsed().as_millis() as u64;
541        let compressed_size = combined_data.len();
542        let ratio = if data.is_empty() {
543            1.0
544        } else {
545            compressed_size as f32 / data.len() as f32
546        };
547
548        Ok(CompressionResult {
549            data: combined_data,
550            algorithm,
551            level,
552            original_size: data.len(),
553            compressed_size,
554            ratio,
555            compression_time_ms,
556        })
557    }
558
559    /// Decompress parallel-compressed data
560    pub fn decompress_parallel(
561        &self,
562        compressed_data: &[u8],
563        algorithm: CompressionAlgorithm,
564    ) -> Result<DecompressionResult> {
565        if compressed_data.len() < 8 {
566            // Not parallel-compressed, use regular decompression
567            return self.compressor.decompress_data(compressed_data, algorithm);
568        }
569
570        let start_time = std::time::Instant::now();
571
572        // Read number of chunks
573        let num_chunks = u64::from_le_bytes(
574            compressed_data[0..8]
575                .try_into()
576                .expect("slice of 8 bytes should convert to [u8; 8]"),
577        ) as usize;
578        let mut offset = 8;
579
580        // Read chunk sizes and data
581        let mut chunks = Vec::with_capacity(num_chunks);
582        for _ in 0..num_chunks {
583            if offset + 8 > compressed_data.len() {
584                return Err(TorshError::InvalidArgument(
585                    "Invalid parallel-compressed data format".to_string(),
586                ));
587            }
588
589            let chunk_size = u64::from_le_bytes(
590                compressed_data[offset..offset + 8]
591                    .try_into()
592                    .expect("slice of 8 bytes should convert to [u8; 8]"),
593            ) as usize;
594            offset += 8;
595
596            if offset + chunk_size > compressed_data.len() {
597                return Err(TorshError::InvalidArgument(
598                    "Invalid chunk size in parallel-compressed data".to_string(),
599                ));
600            }
601
602            chunks.push(&compressed_data[offset..offset + chunk_size]);
603            offset += chunk_size;
604        }
605
606        // Decompress chunks in parallel
607        use scirs2_core::parallel_ops::{IntoParallelIterator, ParallelIterator};
608
609        let decompressed_chunks: Vec<_> = chunks
610            .into_par_iter()
611            .map(|chunk| {
612                self.compressor
613                    .decompress_data(chunk, algorithm)
614                    .map(|result| result.data)
615            })
616            .collect::<Result<Vec<_>>>()?;
617
618        // Combine decompressed chunks
619        let combined_data = decompressed_chunks.into_iter().flatten().collect();
620
621        let decompression_time_ms = start_time.elapsed().as_millis() as u64;
622
623        Ok(DecompressionResult {
624            data: combined_data,
625            algorithm,
626            decompression_time_ms,
627        })
628    }
629}
630
631/// Compression statistics collector
632#[derive(Debug, Clone, Default)]
633pub struct CompressionStats {
634    /// Total bytes compressed
635    pub total_compressed: usize,
636    /// Total bytes after compression
637    pub total_after_compression: usize,
638    /// Total compression time
639    pub total_time_ms: u64,
640    /// Algorithm usage statistics
641    pub algorithm_usage: HashMap<CompressionAlgorithm, u32>,
642    /// Average compression ratios by algorithm
643    pub algorithm_ratios: HashMap<CompressionAlgorithm, f32>,
644}
645
646impl CompressionStats {
647    /// Create new stats collector
648    pub fn new() -> Self {
649        Self::default()
650    }
651
652    /// Record compression result
653    pub fn record(&mut self, result: &CompressionResult) {
654        self.total_compressed += result.original_size;
655        self.total_after_compression += result.compressed_size;
656        self.total_time_ms += result.compression_time_ms;
657
658        *self.algorithm_usage.entry(result.algorithm).or_insert(0) += 1;
659
660        // Update rolling average of compression ratios
661        let current_ratio = self.algorithm_ratios.get(&result.algorithm).unwrap_or(&0.0);
662        let count = self.algorithm_usage[&result.algorithm] as f32;
663        let new_ratio = (current_ratio * (count - 1.0) + result.ratio) / count;
664        self.algorithm_ratios.insert(result.algorithm, new_ratio);
665    }
666
667    /// Get overall compression ratio
668    pub fn overall_ratio(&self) -> f32 {
669        if self.total_compressed == 0 {
670            1.0
671        } else {
672            self.total_after_compression as f32 / self.total_compressed as f32
673        }
674    }
675
676    /// Get space saved in bytes
677    pub fn space_saved(&self) -> usize {
678        self.total_compressed
679            .saturating_sub(self.total_after_compression)
680    }
681
682    /// Get space saved as percentage
683    pub fn space_saved_percent(&self) -> f32 {
684        if self.total_compressed == 0 {
685            0.0
686        } else {
687            (self.space_saved() as f32 / self.total_compressed as f32) * 100.0
688        }
689    }
690
691    /// Get most used algorithm
692    pub fn most_used_algorithm(&self) -> Option<CompressionAlgorithm> {
693        self.algorithm_usage
694            .iter()
695            .max_by_key(|(_, &count)| count)
696            .map(|(&algorithm, _)| algorithm)
697    }
698
699    /// Get best performing algorithm (by compression ratio)
700    pub fn best_performing_algorithm(&self) -> Option<CompressionAlgorithm> {
701        self.algorithm_ratios
702            .iter()
703            .min_by(|(_, &a), (_, &b)| a.partial_cmp(&b).unwrap_or(std::cmp::Ordering::Equal))
704            .map(|(&algorithm, _)| algorithm)
705    }
706}
707
708#[cfg(test)]
709mod tests {
710    use super::*;
711
712    #[test]
713    fn test_compression_config() {
714        let config = CompressionConfig::new()
715            .with_algorithm(CompressionAlgorithm::Zstd)
716            .with_level(CompressionLevel(8))
717            .with_strategy(CompressionStrategy::Size);
718
719        assert_eq!(config.default_algorithm, CompressionAlgorithm::Zstd);
720        assert_eq!(config.default_level.0, 8);
721        assert_eq!(config.strategy, CompressionStrategy::Size);
722    }
723
724    #[test]
725    fn test_compression_level() {
726        let level = CompressionLevel(15);
727
728        assert_eq!(level.for_algorithm(CompressionAlgorithm::Gzip), 9);
729        assert_eq!(level.for_algorithm(CompressionAlgorithm::Zstd), 15);
730        assert_eq!(level.for_algorithm(CompressionAlgorithm::Brotli), 11);
731    }
732
733    #[test]
734    fn test_basic_compression() {
735        let compressor = AdvancedCompressor::new();
736        let test_data = "Hello, World! ".repeat(100);
737
738        let result = compressor
739            .compress_data(
740                test_data.as_bytes(),
741                CompressionAlgorithm::Gzip,
742                CompressionLevel(6),
743            )
744            .unwrap();
745
746        assert_eq!(result.algorithm, CompressionAlgorithm::Gzip);
747        assert_eq!(result.original_size, test_data.len());
748        assert!(result.compressed_size < result.original_size);
749        assert!(result.ratio < 1.0);
750    }
751
752    #[test]
753    fn test_decompression() {
754        let compressor = AdvancedCompressor::new();
755        let test_data = "This is test data for compression and decompression.".repeat(10);
756
757        let compression_result = compressor
758            .compress_data(
759                test_data.as_bytes(),
760                CompressionAlgorithm::Gzip,
761                CompressionLevel(6),
762            )
763            .unwrap();
764
765        let decompression_result = compressor
766            .decompress_data(&compression_result.data, CompressionAlgorithm::Gzip)
767            .unwrap();
768
769        assert_eq!(decompression_result.data, test_data.as_bytes());
770        assert_eq!(decompression_result.algorithm, CompressionAlgorithm::Gzip);
771    }
772
773    #[test]
774    fn test_resource_compression() {
775        let compressor = AdvancedCompressor::new();
776
777        let resource = Resource::new(
778            "test.txt".to_string(),
779            ResourceType::Text,
780            "This is a test text file with some content that should compress well."
781                .repeat(20)
782                .as_bytes()
783                .to_vec(),
784        );
785
786        let result = compressor.compress_resource(&resource).unwrap();
787
788        // Text should compress well
789        assert!(result.ratio < 0.5);
790        assert_eq!(result.original_size, resource.data.len());
791    }
792
793    #[test]
794    fn test_compression_stats() {
795        let mut stats = CompressionStats::new();
796
797        let result1 = CompressionResult {
798            data: vec![0; 100],
799            algorithm: CompressionAlgorithm::Gzip,
800            level: CompressionLevel(6),
801            original_size: 200,
802            compressed_size: 100,
803            ratio: 0.5,
804            compression_time_ms: 10,
805        };
806
807        let result2 = CompressionResult {
808            data: vec![0; 80],
809            algorithm: CompressionAlgorithm::Zstd,
810            level: CompressionLevel(6),
811            original_size: 200,
812            compressed_size: 80,
813            ratio: 0.4,
814            compression_time_ms: 8,
815        };
816
817        stats.record(&result1);
818        stats.record(&result2);
819
820        assert_eq!(stats.total_compressed, 400);
821        assert_eq!(stats.total_after_compression, 180);
822        assert_eq!(stats.space_saved(), 220);
823        assert!((stats.space_saved_percent() - 55.0).abs() < 0.1);
824
825        assert_eq!(stats.algorithm_usage[&CompressionAlgorithm::Gzip], 1);
826        assert_eq!(stats.algorithm_usage[&CompressionAlgorithm::Zstd], 1);
827
828        assert_eq!(
829            stats.best_performing_algorithm(),
830            Some(CompressionAlgorithm::Zstd)
831        );
832    }
833
834    #[test]
835    fn test_small_file_skip() {
836        let compressor = AdvancedCompressor::new();
837        let small_data = b"tiny";
838
839        // Create a small resource to test the size threshold
840        let small_resource = Resource::new(
841            "small.txt".to_string(),
842            ResourceType::Text,
843            small_data.to_vec(),
844        );
845
846        let result = compressor.compress_resource(&small_resource).unwrap();
847
848        // Small files should not be compressed (below threshold)
849        assert_eq!(result.algorithm, CompressionAlgorithm::None);
850        assert_eq!(result.data, small_data);
851        assert_eq!(result.ratio, 1.0);
852    }
853
854    #[test]
855    fn test_benchmark_algorithms() {
856        let compressor = AdvancedCompressor::new();
857        let test_data = "This is benchmark data. ".repeat(100);
858
859        let results = compressor
860            .benchmark_algorithms(test_data.as_bytes())
861            .unwrap();
862
863        // Should have results for multiple algorithms
864        assert!(results.len() >= 2);
865
866        // Results should be sorted by compression ratio
867        for i in 1..results.len() {
868            assert!(results[i - 1].ratio <= results[i].ratio);
869        }
870
871        // All should have compressed the same original data
872        for result in &results {
873            assert_eq!(result.original_size, test_data.len());
874        }
875    }
876}
torsh_package/compression.rs

torsh_package/
compression.rs