1use crate::error::{BinaryError, Result};
4use flate2::read::GzDecoder;
5use std::io::Read;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum CompressionType {
10 None = 0,
12 Lzma = 1,
14 Lz4 = 2,
16 Lz4Hc = 3,
18 Lzham = 4,
20 Brotli = 5,
22}
23
24impl CompressionType {
25 pub fn from_flags(flags: u32) -> Result<Self> {
27 match flags & 0x3F {
28 0 => Ok(CompressionType::None),
29 1 => Ok(CompressionType::Lzma),
30 2 => Ok(CompressionType::Lz4),
31 3 => Ok(CompressionType::Lz4Hc),
32 4 => Ok(CompressionType::Lzham),
33 5 => Ok(CompressionType::Brotli),
34 other => Err(BinaryError::unsupported_compression(format!(
35 "Unknown compression type: {}",
36 other
37 ))),
38 }
39 }
40
41 pub fn is_supported(self) -> bool {
43 matches!(
44 self,
45 CompressionType::None
46 | CompressionType::Lz4
47 | CompressionType::Lz4Hc
48 | CompressionType::Lzma
49 | CompressionType::Brotli
50 )
51 }
52
53 pub fn name(self) -> &'static str {
55 match self {
56 CompressionType::None => "None",
57 CompressionType::Lzma => "LZMA",
58 CompressionType::Lz4 => "LZ4",
59 CompressionType::Lz4Hc => "LZ4HC",
60 CompressionType::Lzham => "LZHAM",
61 CompressionType::Brotli => "Brotli",
62 }
63 }
64}
65
66pub fn decompress(
68 data: &[u8],
69 compression: CompressionType,
70 uncompressed_size: usize,
71) -> Result<Vec<u8>> {
72 match compression {
73 CompressionType::None => {
74 Ok(data.to_vec())
76 }
77 CompressionType::Lz4 | CompressionType::Lz4Hc => {
78 decompress_lz4(data, uncompressed_size)
80 }
81 CompressionType::Lzma => {
82 decompress_lzma(data, uncompressed_size)
84 }
85 CompressionType::Lzham => {
86 Err(BinaryError::unsupported_compression(
88 "LZHAM compression not yet supported",
89 ))
90 }
91 CompressionType::Brotli => {
92 decompress_brotli(data)
94 }
95 }
96}
97
98fn decompress_lz4(data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
100 let buffer_size = uncompressed_size + 128; match lz4_flex::decompress(data, buffer_size) {
108 Ok(decompressed) => {
109 let size_diff = if decompressed.len() > uncompressed_size {
111 decompressed.len() - uncompressed_size
112 } else {
113 uncompressed_size - decompressed.len()
114 };
115
116 if size_diff <= 128 {
117 if decompressed.len() != uncompressed_size {
119 println!(
120 "DEBUG: LZ4 size mismatch (within tolerance): expected {}, got {} (diff: {})",
121 uncompressed_size,
122 decompressed.len(),
123 size_diff
124 );
125 }
126 Ok(decompressed)
127 } else {
128 Err(BinaryError::decompression_failed(format!(
129 "LZ4 decompression size mismatch: expected {}, got {} (diff: {})",
130 uncompressed_size,
131 decompressed.len(),
132 size_diff
133 )))
134 }
135 }
136 Err(e) => {
137 match lz4_flex::decompress(data, uncompressed_size) {
139 Ok(decompressed) => {
140 println!(
141 "DEBUG: LZ4 decompression succeeded with exact size: {} bytes",
142 decompressed.len()
143 );
144 Ok(decompressed)
145 }
146 Err(_) => Err(BinaryError::decompression_failed(format!(
147 "LZ4 block decompression failed: {}",
148 e
149 ))),
150 }
151 }
152 }
153}
154
155fn decompress_lzma(data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
157 if data.is_empty() {
159 return Err(BinaryError::invalid_data("LZMA data is empty".to_string()));
160 }
161
162 println!(
163 "DEBUG: LZMA decompression - input size: {}, expected output: {}",
164 data.len(),
165 uncompressed_size
166 );
167
168 let preview_len = 32.min(data.len());
170 let preview: Vec<String> = data[..preview_len]
171 .iter()
172 .map(|b| format!("{:02X}", b))
173 .collect();
174 println!(
175 "DEBUG: LZMA data first {} bytes: {}",
176 preview_len,
177 preview.join(" ")
178 );
179
180 let result = try_unity_lzma_strategies(data, uncompressed_size);
188 if result.is_ok() {
189 return result;
190 }
191
192 #[cfg(feature = "xz2")]
194 {
195 if let Ok(result) = try_xz2_lzma(data, uncompressed_size) {
196 return Ok(result);
197 }
198 }
199
200 Err(BinaryError::decompression_failed(format!(
201 "LZMA decompression failed with all strategies. Input size: {}, expected output: {}",
202 data.len(),
203 uncompressed_size
204 )))
205}
206
207fn try_unity_lzma_strategies(data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
209 if let Ok(result) = try_unity_lzma_with_header(data, uncompressed_size) {
211 return Ok(result);
212 }
213
214 if let Ok(result) = try_unity_raw_lzma(data, uncompressed_size) {
216 return Ok(result);
217 }
218
219 let strategies = [
221 ("direct", data),
222 (
223 "skip_13_header",
224 if data.len() > 13 { &data[13..] } else { data },
225 ),
226 (
227 "skip_5_header",
228 if data.len() > 5 { &data[5..] } else { data },
229 ),
230 (
231 "skip_8_header",
232 if data.len() > 8 { &data[8..] } else { data },
233 ),
234 (
235 "unity_custom",
236 if data.len() > 9 { &data[9..] } else { data },
237 ),
238 ];
239
240 for (strategy_name, test_data) in &strategies {
241 if test_data.is_empty() {
242 continue;
243 }
244
245 println!(
246 "DEBUG: Trying LZMA strategy: {}, data size: {}",
247 strategy_name,
248 test_data.len()
249 );
250
251 let mut output = Vec::new();
252 match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(test_data), &mut output) {
253 Ok(_) => {
254 println!(
255 "DEBUG: LZMA strategy '{}' succeeded, output size: {}",
256 strategy_name,
257 output.len()
258 );
259
260 let size_ratio = output.len() as f64 / uncompressed_size as f64;
262 if (0.8..=1.2).contains(&size_ratio) {
263 return Ok(output);
265 } else if output.len() == uncompressed_size {
266 return Ok(output);
268 }
269 }
270 Err(_e) => {
271 }
273 }
274 }
275
276 Err(BinaryError::decompression_failed(
277 "All Unity LZMA strategies failed".to_string(),
278 ))
279}
280
281fn try_unity_lzma_with_header(data: &[u8], expected_size: usize) -> Result<Vec<u8>> {
283 if data.len() < 13 {
284 return Err(BinaryError::invalid_data(
285 "LZMA data too short for header".to_string(),
286 ));
287 }
288
289 let props = data[0];
297 let dict_size = u32::from_le_bytes([data[1], data[2], data[3], data[4]]);
298
299 let _lc = props % 9;
301 let remainder = props / 9;
302 let _pb = remainder / 5;
303 let _lp = remainder % 5;
304
305 let offsets_to_try = [5, 13]; for &data_offset in &offsets_to_try {
309 if data_offset >= data.len() {
310 continue;
311 }
312
313 let compressed_data = &data[data_offset..];
314 println!(
315 "DEBUG: Trying Unity LZMA with offset {}, compressed size: {}",
316 data_offset,
317 compressed_data.len()
318 );
319
320 #[cfg(feature = "xz2")]
322 {
323 match try_unity_lzma_with_xz2(props, dict_size, compressed_data, expected_size) {
324 Ok(result) => {
325 println!(
326 "DEBUG: Unity LZMA with xz2 succeeded, output size: {}",
327 result.len()
328 );
329 if result.len() == expected_size {
330 return Ok(result);
331 }
332 }
333 Err(_e) => {
334 }
336 }
337 }
338
339 let lc = props % 9;
341 let remainder = props / 9;
342 let pb = remainder / 5;
343 let lp = remainder % 5;
344
345 println!(
346 "DEBUG: UnityPy LZMA params - lc: {}, pb: {}, lp: {}",
347 lc, pb, lp
348 );
349
350 let mut unity_lzma_data = Vec::new();
352 unity_lzma_data.push(props);
353 unity_lzma_data.extend_from_slice(&dict_size.to_le_bytes());
354 unity_lzma_data.extend_from_slice(&(expected_size as u64).to_le_bytes());
355 unity_lzma_data.extend_from_slice(compressed_data);
356
357 let mut output = Vec::new();
358 match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(&unity_lzma_data), &mut output) {
359 Ok(_) => {
360 println!(
361 "DEBUG: Unity LZMA with UnityPy params succeeded, output size: {}",
362 output.len()
363 );
364 if output.len() == expected_size {
365 return Ok(output);
366 } else if !output.is_empty() {
367 let ratio = output.len() as f64 / expected_size as f64;
368 if (0.8..=1.2).contains(&ratio) {
369 return Ok(output);
370 }
371 }
372 }
373 Err(_e) => {
374 }
376 }
377
378 let mut lzma_data = Vec::new();
380 lzma_data.push(props);
381 lzma_data.extend_from_slice(&dict_size.to_le_bytes());
382 lzma_data.extend_from_slice(&(expected_size as u64).to_le_bytes());
383 lzma_data.extend_from_slice(compressed_data);
384
385 let mut output = Vec::new();
386 match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(&lzma_data), &mut output) {
387 Ok(_) => {
388 println!(
389 "DEBUG: Unity LZMA with lzma_rs succeeded, output size: {}",
390 output.len()
391 );
392 if output.len() == expected_size {
393 return Ok(output);
394 } else if !output.is_empty() {
395 let ratio = output.len() as f64 / expected_size as f64;
396 if (0.8..=1.2).contains(&ratio) {
397 return Ok(output);
398 }
399 }
400 }
401 Err(_e) => {
402 }
404 }
405 }
406
407 Err(BinaryError::decompression_failed(
408 "Unity LZMA header parsing failed".to_string(),
409 ))
410}
411
412#[cfg(feature = "xz2")]
414fn try_unity_lzma_with_xz2(
415 _props: u8,
416 _dict_size: u32,
417 _compressed_data: &[u8],
418 _expected_size: usize,
419) -> Result<Vec<u8>> {
420 Err(BinaryError::decompression_failed(
423 "XZ2 LZMA not yet implemented".to_string(),
424 ))
425}
426
427fn try_unity_raw_lzma(data: &[u8], expected_size: usize) -> Result<Vec<u8>> {
429 if data.len() < 13 {
430 return Err(BinaryError::invalid_data(
431 "Data too short for Unity LZMA".to_string(),
432 ));
433 }
434
435 let offsets_to_try = [0, 5, 8, 9, 13, 16];
438
439 for &offset in &offsets_to_try {
440 if offset >= data.len() {
441 continue;
442 }
443
444 let lzma_stream = &data[offset..];
445 if lzma_stream.len() < 5 {
446 continue;
447 }
448
449 println!(
450 "DEBUG: Trying LZMA stream from offset {}, size: {}",
451 offset,
452 lzma_stream.len()
453 );
454
455 let mut output = Vec::new();
457 match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(lzma_stream), &mut output) {
458 Ok(_) => {
459 println!(
460 "DEBUG: Raw LZMA from offset {} succeeded, output size: {}",
461 offset,
462 output.len()
463 );
464
465 if output.len() == expected_size {
467 return Ok(output);
468 } else if !output.is_empty() {
469 let ratio = output.len() as f64 / expected_size as f64;
470 if (0.5..=2.0).contains(&ratio) {
471 println!(
472 "DEBUG: Size ratio {:.2} is acceptable for offset {}",
473 ratio, offset
474 );
475 return Ok(output);
476 }
477 }
478 }
479 Err(_e) => {
480 }
482 }
483
484 if lzma_stream.len() >= 5 {
486 let mut reconstructed = Vec::new();
487 reconstructed.extend_from_slice(&lzma_stream[0..5]); reconstructed.extend_from_slice(&(expected_size as u64).to_le_bytes()); if lzma_stream.len() > 5 {
490 reconstructed.extend_from_slice(&lzma_stream[5..]); }
492
493 let mut output = Vec::new();
494 match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(&reconstructed), &mut output) {
495 Ok(_) => {
496 println!(
497 "DEBUG: Reconstructed LZMA from offset {} succeeded, output size: {}",
498 offset,
499 output.len()
500 );
501 if output.len() == expected_size {
502 return Ok(output);
503 }
504 }
505 Err(e) => {
506 println!(
507 "DEBUG: Reconstructed LZMA from offset {} failed: {}",
508 offset, e
509 );
510 }
511 }
512 }
513 }
514
515 Err(BinaryError::decompression_failed(
516 "Unity raw LZMA failed".to_string(),
517 ))
518}
519
520#[cfg(feature = "xz2")]
521fn try_xz2_lzma(data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
522 use std::io::Read;
523
524 let strategies = [
526 ("xz2_stream", data),
527 (
528 "xz2_skip_13",
529 if data.len() > 13 { &data[13..] } else { data },
530 ),
531 ("xz2_skip_5", if data.len() > 5 { &data[5..] } else { data }),
532 ];
533
534 for (_strategy_name, test_data) in &strategies {
535 if test_data.is_empty() {
536 continue;
537 }
538
539 let cursor = std::io::Cursor::new(test_data);
541 let mut decoder = xz2::read::XzDecoder::new(cursor);
542 let mut output = Vec::new();
543
544 match decoder.read_to_end(&mut output) {
545 Ok(_) => {
546 let size_ratio = output.len() as f64 / uncompressed_size as f64;
547 if (0.8..=1.2).contains(&size_ratio) || output.len() == uncompressed_size {
548 return Ok(output);
549 }
550 }
551 Err(_) => continue,
552 }
553 }
554
555 Err(BinaryError::decompression_failed(
556 "XZ2 LZMA decompression failed".to_string(),
557 ))
558}
559
560pub fn decompress_brotli(data: &[u8]) -> Result<Vec<u8>> {
562 use std::io::Read;
563 let mut decompressed = Vec::new();
564 let mut decoder = brotli::Decompressor::new(data, 4096); match decoder.read_to_end(&mut decompressed) {
566 Ok(_) => Ok(decompressed),
567 Err(e) => Err(BinaryError::decompression_failed(format!(
568 "Brotli decompression failed: {}",
569 e
570 ))),
571 }
572}
573
574pub fn decompress_gzip(data: &[u8]) -> Result<Vec<u8>> {
576 let mut decoder = GzDecoder::new(data);
577 let mut decompressed = Vec::new();
578 decoder.read_to_end(&mut decompressed).map_err(|e| {
579 BinaryError::decompression_failed(format!("GZIP decompression failed: {}", e))
580 })?;
581 Ok(decompressed)
582}
583
584#[derive(Debug, Clone)]
586pub struct CompressionBlock {
587 pub uncompressed_size: u32,
589 pub compressed_size: u32,
591 pub flags: u16,
593}
594
595impl CompressionBlock {
596 pub fn new(uncompressed_size: u32, compressed_size: u32, flags: u16) -> Self {
598 Self {
599 uncompressed_size,
600 compressed_size,
601 flags,
602 }
603 }
604
605 pub fn compression_type(&self) -> Result<CompressionType> {
607 CompressionType::from_flags(self.flags as u32)
608 }
609
610 pub fn is_compressed(&self) -> bool {
612 self.uncompressed_size != self.compressed_size
613 }
614
615 pub fn decompress(&self, data: &[u8]) -> Result<Vec<u8>> {
617 if data.len() != self.compressed_size as usize {
618 return Err(BinaryError::invalid_data(format!(
619 "Block data size mismatch: expected {}, got {}",
620 self.compressed_size,
621 data.len()
622 )));
623 }
624
625 let compression = self.compression_type()?;
626 decompress(data, compression, self.uncompressed_size as usize)
627 }
628}
629
630pub struct ArchiveFlags;
632
633impl ArchiveFlags {
634 pub const COMPRESSION_TYPE_MASK: u32 = 0x3F;
636 pub const BLOCK_INFO_AT_END: u32 = 0x40;
638 pub const OLD_WEB_PLUGIN_COMPATIBILITY: u32 = 0x80;
640 pub const BLOCK_INFO_NEEDS_PADDING_AT_START: u32 = 0x100;
642}
643
644#[cfg(test)]
645mod tests {
646 use super::*;
647
648 #[test]
649 fn test_compression_type_from_flags() {
650 assert_eq!(
651 CompressionType::from_flags(0).unwrap(),
652 CompressionType::None
653 );
654 assert_eq!(
655 CompressionType::from_flags(1).unwrap(),
656 CompressionType::Lzma
657 );
658 assert_eq!(
659 CompressionType::from_flags(2).unwrap(),
660 CompressionType::Lz4
661 );
662 assert_eq!(
663 CompressionType::from_flags(3).unwrap(),
664 CompressionType::Lz4Hc
665 );
666 }
667
668 #[test]
669 fn test_compression_type_names() {
670 assert_eq!(CompressionType::None.name(), "None");
671 assert_eq!(CompressionType::Lz4.name(), "LZ4");
672 assert_eq!(CompressionType::Lzma.name(), "LZMA");
673 }
674
675 #[test]
676 fn test_compression_type_supported() {
677 assert!(CompressionType::None.is_supported());
678 assert!(CompressionType::Lz4.is_supported());
679 assert!(CompressionType::Lz4Hc.is_supported());
680 assert!(CompressionType::Lzma.is_supported());
681 assert!(!CompressionType::Lzham.is_supported());
682 }
683
684 #[test]
685 fn test_no_compression() {
686 let data = b"Hello, World!";
687 let result = decompress(data, CompressionType::None, data.len()).unwrap();
688 assert_eq!(result, data);
689 }
690
691 #[test]
692 fn test_compression_block() {
693 let block = CompressionBlock::new(100, 80, 2); assert!(block.is_compressed());
695 assert_eq!(block.compression_type().unwrap(), CompressionType::Lz4);
696 }
697
698 #[test]
699 fn test_archive_flags() {
700 let flags = 2 | ArchiveFlags::BLOCK_INFO_AT_END;
701 let compression =
702 CompressionType::from_flags(flags & ArchiveFlags::COMPRESSION_TYPE_MASK).unwrap();
703 assert_eq!(compression, CompressionType::Lz4);
704 assert_eq!(
705 flags & ArchiveFlags::BLOCK_INFO_AT_END,
706 ArchiveFlags::BLOCK_INFO_AT_END
707 );
708 }
709
710 #[test]
711 fn test_brotli_decompression() {
712 let test_data = b"Hello, World!";
715
716 match decompress_brotli(test_data) {
719 Ok(_) => {
720 }
722 Err(_) => {
723 }
725 }
726 }
727
728 #[test]
729 fn test_compression_detection() {
730 assert_eq!(
732 CompressionType::from_flags(0).unwrap(),
733 CompressionType::None
734 );
735 assert_eq!(
736 CompressionType::from_flags(1).unwrap(),
737 CompressionType::Lzma
738 );
739 assert_eq!(
740 CompressionType::from_flags(2).unwrap(),
741 CompressionType::Lz4
742 );
743 assert_eq!(
744 CompressionType::from_flags(3).unwrap(),
745 CompressionType::Lz4Hc
746 );
747 assert_eq!(
748 CompressionType::from_flags(4).unwrap(),
749 CompressionType::Lzham
750 );
751
752 assert_eq!(
754 CompressionType::from_flags(0x42).unwrap(),
755 CompressionType::Lz4
756 ); }
758
759 #[test]
760 fn test_gzip_decompression() {
761 let test_data = b"invalid gzip data";
764
765 match decompress_gzip(test_data) {
767 Ok(_) => panic!("Should fail with invalid GZIP data"),
768 Err(_) => {
769 }
771 }
772 }
773
774 #[test]
775 fn test_compression_support_matrix() {
776 let supported_types = [
778 CompressionType::None,
779 CompressionType::Lz4,
780 CompressionType::Lz4Hc,
781 CompressionType::Lzma,
782 ];
783
784 let unsupported_types = [CompressionType::Lzham];
785
786 for compression_type in supported_types {
787 assert!(
788 compression_type.is_supported(),
789 "Expected {} to be supported",
790 compression_type.name()
791 );
792 }
793
794 for compression_type in unsupported_types {
795 assert!(
796 !compression_type.is_supported(),
797 "Expected {} to be unsupported",
798 compression_type.name()
799 );
800 }
801 }
802}