use crate::error::{BinaryError, Result};
use flate2::read::GzDecoder;
use std::io::Read;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompressionType {
None = 0,
Lzma = 1,
Lz4 = 2,
Lz4Hc = 3,
Lzham = 4,
Brotli = 5,
}
impl CompressionType {
pub fn from_flags(flags: u32) -> Result<Self> {
match flags & 0x3F {
0 => Ok(CompressionType::None),
1 => Ok(CompressionType::Lzma),
2 => Ok(CompressionType::Lz4),
3 => Ok(CompressionType::Lz4Hc),
4 => Ok(CompressionType::Lzham),
5 => Ok(CompressionType::Brotli),
other => Err(BinaryError::unsupported_compression(format!(
"Unknown compression type: {}",
other
))),
}
}
pub fn is_supported(self) -> bool {
matches!(
self,
CompressionType::None
| CompressionType::Lz4
| CompressionType::Lz4Hc
| CompressionType::Lzma
| CompressionType::Brotli
)
}
pub fn name(self) -> &'static str {
match self {
CompressionType::None => "None",
CompressionType::Lzma => "LZMA",
CompressionType::Lz4 => "LZ4",
CompressionType::Lz4Hc => "LZ4HC",
CompressionType::Lzham => "LZHAM",
CompressionType::Brotli => "Brotli",
}
}
}
pub fn decompress(
data: &[u8],
compression: CompressionType,
uncompressed_size: usize,
) -> Result<Vec<u8>> {
match compression {
CompressionType::None => {
Ok(data.to_vec())
}
CompressionType::Lz4 | CompressionType::Lz4Hc => {
decompress_lz4(data, uncompressed_size)
}
CompressionType::Lzma => {
decompress_lzma(data, uncompressed_size)
}
CompressionType::Lzham => {
Err(BinaryError::unsupported_compression(
"LZHAM compression not yet supported",
))
}
CompressionType::Brotli => {
decompress_brotli(data)
}
}
}
fn decompress_lz4(data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
let buffer_size = uncompressed_size
.checked_add(128)
.ok_or_else(|| BinaryError::invalid_data("LZ4 uncompressed_size overflow"))?;
match lz4_flex::decompress(data, buffer_size) {
Ok(decompressed) => {
let size_diff = if decompressed.len() > uncompressed_size {
decompressed.len() - uncompressed_size
} else {
uncompressed_size - decompressed.len()
};
if size_diff <= 128 {
Ok(decompressed)
} else {
Err(BinaryError::decompression_failed(format!(
"LZ4 decompression size mismatch: expected {}, got {} (diff: {})",
uncompressed_size,
decompressed.len(),
size_diff
)))
}
}
Err(e) => {
match lz4_flex::decompress(data, uncompressed_size) {
Ok(decompressed) => Ok(decompressed),
Err(_) => Err(BinaryError::decompression_failed(format!(
"LZ4 block decompression failed: {}",
e
))),
}
}
}
}
fn decompress_lzma(data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
if data.is_empty() {
return Err(BinaryError::invalid_data("LZMA data is empty".to_string()));
}
let result = try_unity_lzma_strategies(data, uncompressed_size);
if result.is_ok() {
return result;
}
Err(BinaryError::decompression_failed(format!(
"LZMA decompression failed with all strategies. Input size: {}, expected output: {}",
data.len(),
uncompressed_size
)))
}
fn try_unity_lzma_strategies(data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
if let Ok(result) = try_unity_lzma_with_header(data, uncompressed_size) {
return Ok(result);
}
if let Ok(result) = try_unity_raw_lzma(data, uncompressed_size) {
return Ok(result);
}
let strategies = [
("direct", data),
(
"skip_13_header",
if data.len() > 13 { &data[13..] } else { data },
),
(
"skip_5_header",
if data.len() > 5 { &data[5..] } else { data },
),
(
"skip_8_header",
if data.len() > 8 { &data[8..] } else { data },
),
(
"unity_custom",
if data.len() > 9 { &data[9..] } else { data },
),
];
for (_strategy_name, test_data) in &strategies {
if test_data.is_empty() {
continue;
}
let mut output = Vec::new();
match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(test_data), &mut output) {
Ok(_) => {
let size_ratio = output.len() as f64 / uncompressed_size as f64;
if (0.8..=1.2).contains(&size_ratio) {
return Ok(output);
} else if output.len() == uncompressed_size {
return Ok(output);
}
}
Err(_e) => {
}
}
}
Err(BinaryError::decompression_failed(
"All Unity LZMA strategies failed".to_string(),
))
}
fn try_unity_lzma_with_header(data: &[u8], expected_size: usize) -> Result<Vec<u8>> {
if data.len() < 13 {
return Err(BinaryError::invalid_data(
"LZMA data too short for header".to_string(),
));
}
let props = data[0];
let dict_size = u32::from_le_bytes([data[1], data[2], data[3], data[4]]);
let _lc = props % 9;
let remainder = props / 9;
let _pb = remainder / 5;
let _lp = remainder % 5;
let offsets_to_try = [5, 13];
for &data_offset in &offsets_to_try {
if data_offset >= data.len() {
continue;
}
let compressed_data = &data[data_offset..];
let _lc = props % 9;
let remainder = props / 9;
let _pb = remainder / 5;
let _lp = remainder % 5;
let mut unity_lzma_data = Vec::new();
unity_lzma_data.push(props);
unity_lzma_data.extend_from_slice(&dict_size.to_le_bytes());
unity_lzma_data.extend_from_slice(&(expected_size as u64).to_le_bytes());
unity_lzma_data.extend_from_slice(compressed_data);
let mut output = Vec::new();
match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(&unity_lzma_data), &mut output) {
Ok(_) => {
if output.len() == expected_size {
return Ok(output);
} else if !output.is_empty() {
let ratio = output.len() as f64 / expected_size as f64;
if (0.8..=1.2).contains(&ratio) {
return Ok(output);
}
}
}
Err(_e) => {
}
}
let mut lzma_data = Vec::new();
lzma_data.push(props);
lzma_data.extend_from_slice(&dict_size.to_le_bytes());
lzma_data.extend_from_slice(&(expected_size as u64).to_le_bytes());
lzma_data.extend_from_slice(compressed_data);
let mut output = Vec::new();
match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(&lzma_data), &mut output) {
Ok(_) => {
if output.len() == expected_size {
return Ok(output);
} else if !output.is_empty() {
let ratio = output.len() as f64 / expected_size as f64;
if (0.8..=1.2).contains(&ratio) {
return Ok(output);
}
}
}
Err(_e) => {
}
}
}
Err(BinaryError::decompression_failed(
"Unity LZMA header parsing failed".to_string(),
))
}
fn try_unity_raw_lzma(data: &[u8], expected_size: usize) -> Result<Vec<u8>> {
if data.len() < 13 {
return Err(BinaryError::invalid_data(
"Data too short for Unity LZMA".to_string(),
));
}
let offsets_to_try = [0, 5, 8, 9, 13, 16];
for &offset in &offsets_to_try {
if offset >= data.len() {
continue;
}
let lzma_stream = &data[offset..];
if lzma_stream.len() < 5 {
continue;
}
let mut output = Vec::new();
match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(lzma_stream), &mut output) {
Ok(_) => {
if output.len() == expected_size {
return Ok(output);
} else if !output.is_empty() {
let ratio = output.len() as f64 / expected_size as f64;
if (0.5..=2.0).contains(&ratio) {
return Ok(output);
}
}
}
Err(_e) => {
}
}
if lzma_stream.len() >= 5 {
let mut reconstructed = Vec::new();
reconstructed.extend_from_slice(&lzma_stream[0..5]); reconstructed.extend_from_slice(&(expected_size as u64).to_le_bytes()); if lzma_stream.len() > 5 {
reconstructed.extend_from_slice(&lzma_stream[5..]); }
let mut output = Vec::new();
match lzma_rs::lzma_decompress(&mut std::io::Cursor::new(&reconstructed), &mut output) {
Ok(_) => {
if output.len() == expected_size {
return Ok(output);
}
}
Err(e) => {
let _ = e;
}
}
}
}
Err(BinaryError::decompression_failed(
"Unity raw LZMA failed".to_string(),
))
}
pub fn decompress_brotli(data: &[u8]) -> Result<Vec<u8>> {
use std::io::Read;
let mut decompressed = Vec::new();
let mut decoder = brotli::Decompressor::new(data, 4096); match decoder.read_to_end(&mut decompressed) {
Ok(_) => Ok(decompressed),
Err(e) => Err(BinaryError::decompression_failed(format!(
"Brotli decompression failed: {}",
e
))),
}
}
pub fn decompress_gzip(data: &[u8]) -> Result<Vec<u8>> {
let mut decoder = GzDecoder::new(data);
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed).map_err(|e| {
BinaryError::decompression_failed(format!("GZIP decompression failed: {}", e))
})?;
Ok(decompressed)
}
#[derive(Debug, Clone)]
pub struct CompressionBlock {
pub uncompressed_size: u32,
pub compressed_size: u32,
pub flags: u16,
}
impl CompressionBlock {
pub fn new(uncompressed_size: u32, compressed_size: u32, flags: u16) -> Self {
Self {
uncompressed_size,
compressed_size,
flags,
}
}
pub fn compression_type(&self) -> Result<CompressionType> {
CompressionType::from_flags(self.flags as u32)
}
pub fn is_compressed(&self) -> bool {
self.uncompressed_size != self.compressed_size
}
pub fn decompress(&self, data: &[u8]) -> Result<Vec<u8>> {
if data.len() != self.compressed_size as usize {
return Err(BinaryError::invalid_data(format!(
"Block data size mismatch: expected {}, got {}",
self.compressed_size,
data.len()
)));
}
let compression = self.compression_type()?;
decompress(data, compression, self.uncompressed_size as usize)
}
}
pub struct ArchiveFlags;
impl ArchiveFlags {
pub const COMPRESSION_TYPE_MASK: u32 = 0x3F;
pub const BLOCKS_AND_DIRECTORY_INFO_COMBINED: u32 = 0x40;
pub const BLOCK_INFO_AT_END: u32 = 0x80;
pub const OLD_WEB_PLUGIN_COMPATIBILITY: u32 = 0x100;
pub const BLOCK_INFO_NEEDS_PADDING_AT_START: u32 = 0x200;
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compression_type_from_flags() {
assert_eq!(
CompressionType::from_flags(0).unwrap(),
CompressionType::None
);
assert_eq!(
CompressionType::from_flags(1).unwrap(),
CompressionType::Lzma
);
assert_eq!(
CompressionType::from_flags(2).unwrap(),
CompressionType::Lz4
);
assert_eq!(
CompressionType::from_flags(3).unwrap(),
CompressionType::Lz4Hc
);
}
#[test]
fn test_compression_type_names() {
assert_eq!(CompressionType::None.name(), "None");
assert_eq!(CompressionType::Lz4.name(), "LZ4");
assert_eq!(CompressionType::Lzma.name(), "LZMA");
}
#[test]
fn test_compression_type_supported() {
assert!(CompressionType::None.is_supported());
assert!(CompressionType::Lz4.is_supported());
assert!(CompressionType::Lz4Hc.is_supported());
assert!(CompressionType::Lzma.is_supported());
assert!(!CompressionType::Lzham.is_supported());
}
#[test]
fn test_no_compression() {
let data = b"Hello, World!";
let result = decompress(data, CompressionType::None, data.len()).unwrap();
assert_eq!(result, data);
}
#[test]
fn test_compression_block() {
let block = CompressionBlock::new(100, 80, 2); assert!(block.is_compressed());
assert_eq!(block.compression_type().unwrap(), CompressionType::Lz4);
}
#[test]
fn test_archive_flags() {
let flags = 2 | ArchiveFlags::BLOCK_INFO_AT_END;
let compression =
CompressionType::from_flags(flags & ArchiveFlags::COMPRESSION_TYPE_MASK).unwrap();
assert_eq!(compression, CompressionType::Lz4);
assert_eq!(
flags & ArchiveFlags::BLOCK_INFO_AT_END,
ArchiveFlags::BLOCK_INFO_AT_END
);
}
#[test]
fn test_brotli_decompression() {
let test_data = b"Hello, World!";
match decompress_brotli(test_data) {
Ok(_) => {
}
Err(_) => {
}
}
}
#[test]
fn test_compression_detection() {
assert_eq!(
CompressionType::from_flags(0).unwrap(),
CompressionType::None
);
assert_eq!(
CompressionType::from_flags(1).unwrap(),
CompressionType::Lzma
);
assert_eq!(
CompressionType::from_flags(2).unwrap(),
CompressionType::Lz4
);
assert_eq!(
CompressionType::from_flags(3).unwrap(),
CompressionType::Lz4Hc
);
assert_eq!(
CompressionType::from_flags(4).unwrap(),
CompressionType::Lzham
);
assert_eq!(
CompressionType::from_flags(0x42).unwrap(),
CompressionType::Lz4
); }
#[test]
fn test_gzip_decompression() {
let test_data = b"invalid gzip data";
match decompress_gzip(test_data) {
Ok(_) => panic!("Should fail with invalid GZIP data"),
Err(_) => {
}
}
}
#[test]
fn test_compression_support_matrix() {
let supported_types = [
CompressionType::None,
CompressionType::Lz4,
CompressionType::Lz4Hc,
CompressionType::Lzma,
];
let unsupported_types = [CompressionType::Lzham];
for compression_type in supported_types {
assert!(
compression_type.is_supported(),
"Expected {} to be supported",
compression_type.name()
);
}
for compression_type in unsupported_types {
assert!(
!compression_type.is_supported(),
"Expected {} to be unsupported",
compression_type.name()
);
}
}
}