unity_asset_binary/
compression.rs

1//! Compression support for Unity binary files
2
3use crate::error::{BinaryError, Result};
4use flate2::read::GzDecoder;
5use std::io::Read;
6
7/// Compression types supported by Unity
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum CompressionType {
10    /// No compression
11    None = 0,
12    /// LZMA compression
13    Lzma = 1,
14    /// LZ4 compression
15    Lz4 = 2,
16    /// LZ4HC (High Compression) compression
17    Lz4Hc = 3,
18    /// LZHAM compression
19    Lzham = 4,
20    /// Brotli compression (WebGL builds)
21    Brotli = 5,
22}
23
24impl CompressionType {
25    /// Create compression type from magic number/flags
26    pub fn from_flags(flags: u32) -> Result<Self> {
27        match flags & 0x3F {
28            0 => Ok(CompressionType::None),
29            1 => Ok(CompressionType::Lzma),
30            2 => Ok(CompressionType::Lz4),
31            3 => Ok(CompressionType::Lz4Hc),
32            4 => Ok(CompressionType::Lzham),
33            5 => Ok(CompressionType::Brotli),
34            other => Err(BinaryError::unsupported_compression(format!(
35                "Unknown compression type: {}",
36                other
37            ))),
38        }
39    }
40
41    /// Check if this compression type is supported
42    pub fn is_supported(self) -> bool {
43        matches!(
44            self,
45            CompressionType::None
46                | CompressionType::Lz4
47                | CompressionType::Lz4Hc
48                | CompressionType::Lzma
49                | CompressionType::Brotli
50        )
51    }
52
53    /// Get the name of the compression type
54    pub fn name(self) -> &'static str {
55        match self {
56            CompressionType::None => "None",
57            CompressionType::Lzma => "LZMA",
58            CompressionType::Lz4 => "LZ4",
59            CompressionType::Lz4Hc => "LZ4HC",
60            CompressionType::Lzham => "LZHAM",
61            CompressionType::Brotli => "Brotli",
62        }
63    }
64}
65
66/// Decompress data based on compression type
67pub fn decompress(
68    data: &[u8],
69    compression: CompressionType,
70    uncompressed_size: usize,
71) -> Result<Vec<u8>> {
72    match compression {
73        CompressionType::None => {
74            // No compression, return data as-is
75            Ok(data.to_vec())
76        }
77        CompressionType::Lz4 | CompressionType::Lz4Hc => {
78            // LZ4 decompression
79            decompress_lz4(data, uncompressed_size)
80        }
81        CompressionType::Lzma => {
82            // LZMA decompression
83            decompress_lzma(data, uncompressed_size)
84        }
85        CompressionType::Lzham => {
86            // LZHAM decompression (not implemented yet)
87            Err(BinaryError::unsupported_compression(
88                "LZHAM compression not yet supported",
89            ))
90        }
91        CompressionType::Brotli => {
92            // Brotli decompression
93            decompress_brotli(data)
94        }
95    }
96}
97
98/// Decompress LZ4 compressed data (Unity uses block format, not frame format)
99fn decompress_lz4(data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
100    // Unity uses LZ4 block format, not frame format
101    // This is the same as UnityPy's lz4.block.decompress
102
103    // Unity LZ4 data sometimes has size estimation issues
104    // Try with a larger buffer first to avoid size mismatch errors
105    let buffer_size = uncompressed_size
106        .checked_add(128)
107        .ok_or_else(|| BinaryError::invalid_data("LZ4 uncompressed_size overflow"))?; // Add padding for Unity's size estimation issues
108
109    match lz4_flex::decompress(data, buffer_size) {
110        Ok(decompressed) => {
111            // Check if the decompressed size is reasonable
112            let size_diff = if decompressed.len() > uncompressed_size {
113                decompressed.len() - uncompressed_size
114            } else {
115                uncompressed_size - decompressed.len()
116            };
117
118            if size_diff <= 128 {
119                // Allow up to 128 bytes difference (Unity padding/alignment)
120                Ok(decompressed)
121            } else {
122                Err(BinaryError::decompression_failed(format!(
123                    "LZ4 decompression size mismatch: expected {}, got {} (diff: {})",
124                    uncompressed_size,
125                    decompressed.len(),
126                    size_diff
127                )))
128            }
129        }
130        Err(e) => {
131            // If larger buffer fails, try with exact size as fallback
132            match lz4_flex::decompress(data, uncompressed_size) {
133                Ok(decompressed) => Ok(decompressed),
134                Err(_) => Err(BinaryError::decompression_failed(format!(
135                    "LZ4 block decompression failed: {}",
136                    e
137                ))),
138            }
139        }
140    }
141}
142
143/// Decompress LZMA compressed data (Unity uses LZMA1 format)
144fn decompress_lzma(data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
145    // Unity uses LZMA format, try different approaches
146    if data.is_empty() {
147        return Err(BinaryError::invalid_data("LZMA data is empty".to_string()));
148    }
149
150    // Unity LZMA format analysis:
151    // Unity uses LZMA with specific header formats:
152    // Format 1: Standard LZMA with 13-byte header (5 bytes properties + 8 bytes size)
153    // Format 2: Unity custom LZMA with modified header
154    // Format 3: Raw LZMA stream without header
155
156    // Try Unity-specific LZMA decompression strategies
157    let result = try_unity_lzma_strategies(data, uncompressed_size);
158    if result.is_ok() {
159        return result;
160    }
161
162    Err(BinaryError::decompression_failed(format!(
163        "LZMA decompression failed with all strategies. Input size: {}, expected output: {}",
164        data.len(),
165        uncompressed_size
166    )))
167}
168
169/// Try Unity-specific LZMA decompression strategies
170fn try_unity_lzma_strategies(data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
171    // Strategy 1: Try with Unity LZMA header format
172    if let Ok(result) = try_unity_lzma_with_header(data, uncompressed_size) {
173        return Ok(result);
174    }
175
176    // Strategy 2: Try Unity raw LZMA approach
177    if let Ok(result) = try_unity_raw_lzma(data, uncompressed_size) {
178        return Ok(result);
179    }
180
181    // Strategy 3: Try standard LZMA formats
182    let strategies = [
183        ("direct", data),
184        (
185            "skip_13_header",
186            if data.len() > 13 { &data[13..] } else { data },
187        ),
188        (
189            "skip_5_header",
190            if data.len() > 5 { &data[5..] } else { data },
191        ),
192        (
193            "skip_8_header",
194            if data.len() > 8 { &data[8..] } else { data },
195        ),
196        (
197            "unity_custom",
198            if data.len() > 9 { &data[9..] } else { data },
199        ),
200    ];
201
202    for (_strategy_name, test_data) in &strategies {
203        if test_data.is_empty() {
204            continue;
205        }
206
207        let mut output = Vec::new();
208        match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(test_data), &mut output) {
209            Ok(_) => {
210                // Check if size is reasonable
211                let size_ratio = output.len() as f64 / uncompressed_size as f64;
212                if (0.8..=1.2).contains(&size_ratio) {
213                    // Size is within 20% of expected, probably correct
214                    return Ok(output);
215                } else if output.len() == uncompressed_size {
216                    // Exact match
217                    return Ok(output);
218                }
219            }
220            Err(_e) => {
221                // Strategy failed, continue to next
222            }
223        }
224    }
225
226    Err(BinaryError::decompression_failed(
227        "All Unity LZMA strategies failed".to_string(),
228    ))
229}
230
231/// Try Unity LZMA with custom header parsing (based on UnityPy implementation)
232fn try_unity_lzma_with_header(data: &[u8], expected_size: usize) -> Result<Vec<u8>> {
233    if data.len() < 13 {
234        return Err(BinaryError::invalid_data(
235            "LZMA data too short for header".to_string(),
236        ));
237    }
238
239    // Unity LZMA header format (based on UnityPy):
240    // Bytes 0: props (LZMA properties byte)
241    // Bytes 1-4: dict_size (little-endian u32)
242    // Bytes 5-12: Uncompressed size (little-endian u64) - optional
243    // Rest: Compressed data
244
245    // Parse LZMA properties like UnityPy does
246    let props = data[0];
247    let dict_size = u32::from_le_bytes([data[1], data[2], data[3], data[4]]);
248
249    // Calculate LZMA parameters from props (UnityPy algorithm)
250    let _lc = props % 9;
251    let remainder = props / 9;
252    let _pb = remainder / 5;
253    let _lp = remainder % 5;
254
255    // Try different data offsets (with and without size header)
256    let offsets_to_try = [5, 13]; // 5 = no size header, 13 = with size header
257
258    for &data_offset in &offsets_to_try {
259        if data_offset >= data.len() {
260            continue;
261        }
262
263        let compressed_data = &data[data_offset..];
264
265        // Try UnityPy-style LZMA parameter calculation
266        let _lc = props % 9;
267        let remainder = props / 9;
268        let _pb = remainder / 5;
269        let _lp = remainder % 5;
270
271        // Try with calculated parameters (create custom LZMA header)
272        let mut unity_lzma_data = Vec::new();
273        unity_lzma_data.push(props);
274        unity_lzma_data.extend_from_slice(&dict_size.to_le_bytes());
275        unity_lzma_data.extend_from_slice(&(expected_size as u64).to_le_bytes());
276        unity_lzma_data.extend_from_slice(compressed_data);
277
278        let mut output = Vec::new();
279        match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(&unity_lzma_data), &mut output) {
280            Ok(_) => {
281                if output.len() == expected_size {
282                    return Ok(output);
283                } else if !output.is_empty() {
284                    let ratio = output.len() as f64 / expected_size as f64;
285                    if (0.8..=1.2).contains(&ratio) {
286                        return Ok(output);
287                    }
288                }
289            }
290            Err(_e) => {
291                // UnityPy params failed, continue
292            }
293        }
294
295        // Fallback: reconstruct standard LZMA header and try with lzma_rs
296        let mut lzma_data = Vec::new();
297        lzma_data.push(props);
298        lzma_data.extend_from_slice(&dict_size.to_le_bytes());
299        lzma_data.extend_from_slice(&(expected_size as u64).to_le_bytes());
300        lzma_data.extend_from_slice(compressed_data);
301
302        let mut output = Vec::new();
303        match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(&lzma_data), &mut output) {
304            Ok(_) => {
305                if output.len() == expected_size {
306                    return Ok(output);
307                } else if !output.is_empty() {
308                    let ratio = output.len() as f64 / expected_size as f64;
309                    if (0.8..=1.2).contains(&ratio) {
310                        return Ok(output);
311                    }
312                }
313            }
314            Err(_e) => {
315                // lzma_rs failed, continue
316            }
317        }
318    }
319
320    Err(BinaryError::decompression_failed(
321        "Unity LZMA header parsing failed".to_string(),
322    ))
323}
324
325/// Try Unity-specific LZMA decompression with raw data approach
326fn try_unity_raw_lzma(data: &[u8], expected_size: usize) -> Result<Vec<u8>> {
327    if data.len() < 13 {
328        return Err(BinaryError::invalid_data(
329            "Data too short for Unity LZMA".to_string(),
330        ));
331    }
332
333    // Unity sometimes stores LZMA data with a custom header format
334    // Try to extract the actual LZMA stream from various offsets
335    let offsets_to_try = [0, 5, 8, 9, 13, 16];
336
337    for &offset in &offsets_to_try {
338        if offset >= data.len() {
339            continue;
340        }
341
342        let lzma_stream = &data[offset..];
343        if lzma_stream.len() < 5 {
344            continue;
345        }
346
347        // Try to decompress as raw LZMA stream
348        let mut output = Vec::new();
349        match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(lzma_stream), &mut output) {
350            Ok(_) => {
351                // Check if size is reasonable
352                if output.len() == expected_size {
353                    return Ok(output);
354                } else if !output.is_empty() {
355                    let ratio = output.len() as f64 / expected_size as f64;
356                    if (0.5..=2.0).contains(&ratio) {
357                        return Ok(output);
358                    }
359                }
360            }
361            Err(_e) => {
362                // Raw LZMA failed, continue
363            }
364        }
365
366        // Try with reconstructed header
367        if lzma_stream.len() >= 5 {
368            let mut reconstructed = Vec::new();
369            reconstructed.extend_from_slice(&lzma_stream[0..5]); // Properties
370            reconstructed.extend_from_slice(&(expected_size as u64).to_le_bytes()); // Size
371            if lzma_stream.len() > 5 {
372                reconstructed.extend_from_slice(&lzma_stream[5..]); // Compressed data
373            }
374
375            let mut output = Vec::new();
376            match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(&reconstructed), &mut output) {
377                Ok(_) => {
378                    if output.len() == expected_size {
379                        return Ok(output);
380                    }
381                }
382                Err(e) => {
383                    let _ = e;
384                }
385            }
386        }
387    }
388
389    Err(BinaryError::decompression_failed(
390        "Unity raw LZMA failed".to_string(),
391    ))
392}
393
394/// Decompress Brotli compressed data (used in WebGL builds)
395pub fn decompress_brotli(data: &[u8]) -> Result<Vec<u8>> {
396    use std::io::Read;
397    let mut decompressed = Vec::new();
398    let mut decoder = brotli::Decompressor::new(data, 4096); // 4KB buffer size
399    match decoder.read_to_end(&mut decompressed) {
400        Ok(_) => Ok(decompressed),
401        Err(e) => Err(BinaryError::decompression_failed(format!(
402            "Brotli decompression failed: {}",
403            e
404        ))),
405    }
406}
407
408/// Decompress GZIP data (used in some Unity formats)
409pub fn decompress_gzip(data: &[u8]) -> Result<Vec<u8>> {
410    let mut decoder = GzDecoder::new(data);
411    let mut decompressed = Vec::new();
412    decoder.read_to_end(&mut decompressed).map_err(|e| {
413        BinaryError::decompression_failed(format!("GZIP decompression failed: {}", e))
414    })?;
415    Ok(decompressed)
416}
417
418/// Compression block information
419#[derive(Debug, Clone)]
420pub struct CompressionBlock {
421    /// Uncompressed size of the block
422    pub uncompressed_size: u32,
423    /// Compressed size of the block
424    pub compressed_size: u32,
425    /// Compression flags
426    pub flags: u16,
427}
428
429impl CompressionBlock {
430    /// Create a new compression block
431    pub fn new(uncompressed_size: u32, compressed_size: u32, flags: u16) -> Self {
432        Self {
433            uncompressed_size,
434            compressed_size,
435            flags,
436        }
437    }
438
439    /// Get the compression type for this block
440    pub fn compression_type(&self) -> Result<CompressionType> {
441        CompressionType::from_flags(self.flags as u32)
442    }
443
444    /// Check if this block is compressed
445    pub fn is_compressed(&self) -> bool {
446        self.uncompressed_size != self.compressed_size
447    }
448
449    /// Decompress the block data
450    pub fn decompress(&self, data: &[u8]) -> Result<Vec<u8>> {
451        if data.len() != self.compressed_size as usize {
452            return Err(BinaryError::invalid_data(format!(
453                "Block data size mismatch: expected {}, got {}",
454                self.compressed_size,
455                data.len()
456            )));
457        }
458
459        let compression = self.compression_type()?;
460        decompress(data, compression, self.uncompressed_size as usize)
461    }
462}
463
464/// Archive flags used in Unity bundle headers
465pub struct ArchiveFlags;
466
467impl ArchiveFlags {
468    /// Compression type mask
469    pub const COMPRESSION_TYPE_MASK: u32 = 0x3F;
470    /// Blocks and directory info combined (UnityFS)
471    pub const BLOCKS_AND_DIRECTORY_INFO_COMBINED: u32 = 0x40;
472    /// Block info at end of file (UnityFS)
473    pub const BLOCK_INFO_AT_END: u32 = 0x80;
474    /// Old web plugin compatibility
475    pub const OLD_WEB_PLUGIN_COMPATIBILITY: u32 = 0x100;
476    /// Block info needs PaddingAtStart
477    pub const BLOCK_INFO_NEEDS_PADDING_AT_START: u32 = 0x200;
478}
479
480#[cfg(test)]
481mod tests {
482    use super::*;
483
484    #[test]
485    fn test_compression_type_from_flags() {
486        assert_eq!(
487            CompressionType::from_flags(0).unwrap(),
488            CompressionType::None
489        );
490        assert_eq!(
491            CompressionType::from_flags(1).unwrap(),
492            CompressionType::Lzma
493        );
494        assert_eq!(
495            CompressionType::from_flags(2).unwrap(),
496            CompressionType::Lz4
497        );
498        assert_eq!(
499            CompressionType::from_flags(3).unwrap(),
500            CompressionType::Lz4Hc
501        );
502    }
503
504    #[test]
505    fn test_compression_type_names() {
506        assert_eq!(CompressionType::None.name(), "None");
507        assert_eq!(CompressionType::Lz4.name(), "LZ4");
508        assert_eq!(CompressionType::Lzma.name(), "LZMA");
509    }
510
511    #[test]
512    fn test_compression_type_supported() {
513        assert!(CompressionType::None.is_supported());
514        assert!(CompressionType::Lz4.is_supported());
515        assert!(CompressionType::Lz4Hc.is_supported());
516        assert!(CompressionType::Lzma.is_supported());
517        assert!(!CompressionType::Lzham.is_supported());
518    }
519
520    #[test]
521    fn test_no_compression() {
522        let data = b"Hello, World!";
523        let result = decompress(data, CompressionType::None, data.len()).unwrap();
524        assert_eq!(result, data);
525    }
526
527    #[test]
528    fn test_compression_block() {
529        let block = CompressionBlock::new(100, 80, 2); // LZ4 compression
530        assert!(block.is_compressed());
531        assert_eq!(block.compression_type().unwrap(), CompressionType::Lz4);
532    }
533
534    #[test]
535    fn test_archive_flags() {
536        let flags = 2 | ArchiveFlags::BLOCK_INFO_AT_END;
537        let compression =
538            CompressionType::from_flags(flags & ArchiveFlags::COMPRESSION_TYPE_MASK).unwrap();
539        assert_eq!(compression, CompressionType::Lz4);
540        assert_eq!(
541            flags & ArchiveFlags::BLOCK_INFO_AT_END,
542            ArchiveFlags::BLOCK_INFO_AT_END
543        );
544    }
545
546    #[test]
547    fn test_brotli_decompression() {
548        // Test with simple data - this is a basic test
549        // In real usage, we would have actual Brotli-compressed Unity data
550        let test_data = b"Hello, World!";
551
552        // For now, just test that the function exists and handles errors gracefully
553        // We can't easily create valid Brotli data in a unit test without the encoder
554        match decompress_brotli(test_data) {
555            Ok(_) => {
556                // If it succeeds, that's fine (though unlikely with random data)
557            }
558            Err(_) => {
559                // Expected for invalid Brotli data
560            }
561        }
562    }
563
564    #[test]
565    fn test_compression_detection() {
566        // Test that we can detect different compression types from flags
567        assert_eq!(
568            CompressionType::from_flags(0).unwrap(),
569            CompressionType::None
570        );
571        assert_eq!(
572            CompressionType::from_flags(1).unwrap(),
573            CompressionType::Lzma
574        );
575        assert_eq!(
576            CompressionType::from_flags(2).unwrap(),
577            CompressionType::Lz4
578        );
579        assert_eq!(
580            CompressionType::from_flags(3).unwrap(),
581            CompressionType::Lz4Hc
582        );
583        assert_eq!(
584            CompressionType::from_flags(4).unwrap(),
585            CompressionType::Lzham
586        );
587
588        // Test with flags that have additional bits set
589        assert_eq!(
590            CompressionType::from_flags(0x42).unwrap(),
591            CompressionType::Lz4
592        ); // LZ4 + other flags
593    }
594
595    #[test]
596    fn test_gzip_decompression() {
597        // Test GZIP decompression with simple data
598        // This is a basic test - in real usage we would have actual GZIP data
599        let test_data = b"invalid gzip data";
600
601        // Should fail gracefully with invalid data
602        match decompress_gzip(test_data) {
603            Ok(_) => panic!("Should fail with invalid GZIP data"),
604            Err(_) => {
605                // Expected behavior for invalid data
606            }
607        }
608    }
609
610    #[test]
611    fn test_compression_support_matrix() {
612        // Verify our support matrix matches expectations
613        let supported_types = [
614            CompressionType::None,
615            CompressionType::Lz4,
616            CompressionType::Lz4Hc,
617            CompressionType::Lzma,
618        ];
619
620        let unsupported_types = [CompressionType::Lzham];
621
622        for compression_type in supported_types {
623            assert!(
624                compression_type.is_supported(),
625                "Expected {} to be supported",
626                compression_type.name()
627            );
628        }
629
630        for compression_type in unsupported_types {
631            assert!(
632                !compression_type.is_supported(),
633                "Expected {} to be unsupported",
634                compression_type.name()
635            );
636        }
637    }
638}