use crate::LiteralsBlockType;
use crate::huffman::{HuffmanBitReader, HuffmanTable, read_huffman_table};
use oxiarc_core::error::{OxiArcError, Result};
#[derive(Debug)]
pub struct LiteralsHeader {
pub block_type: LiteralsBlockType,
pub regenerated_size: usize,
pub compressed_size: usize,
pub num_streams: usize,
pub header_size: usize,
}
pub fn parse_literals_header(data: &[u8]) -> Result<LiteralsHeader> {
if data.is_empty() {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "empty literals section".to_string(),
});
}
let byte0 = data[0];
let block_type = LiteralsBlockType::from_bits(byte0 & 0x03);
let size_format = (byte0 >> 2) & 0x03;
match block_type {
LiteralsBlockType::Raw | LiteralsBlockType::Rle => {
let (regenerated_size, header_size) = match size_format {
0 | 2 => {
if data.is_empty() {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "truncated literals header".to_string(),
});
}
((byte0 >> 3) as usize, 1)
}
1 => {
if data.len() < 2 {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "truncated literals header".to_string(),
});
}
let size = ((byte0 >> 4) as usize) | ((data[1] as usize) << 4);
(size, 2)
}
3 => {
if data.len() < 3 {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "truncated literals header".to_string(),
});
}
let size = ((byte0 >> 4) as usize)
| ((data[1] as usize) << 4)
| ((data[2] as usize) << 12);
(size, 3)
}
_ => unreachable!(),
};
Ok(LiteralsHeader {
block_type,
regenerated_size,
compressed_size: if block_type == LiteralsBlockType::Rle {
1
} else {
regenerated_size
},
num_streams: 1,
header_size,
})
}
LiteralsBlockType::Compressed | LiteralsBlockType::Treeless => {
let (regenerated_size, compressed_size, num_streams, header_size) = match size_format {
0 => {
if data.len() < 3 {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "truncated compressed literals header".to_string(),
});
}
let combined =
((byte0 >> 4) as u32) | ((data[1] as u32) << 4) | ((data[2] as u32) << 12);
let regen = (combined & 0x3FF) as usize;
let comp = ((combined >> 10) & 0x3FF) as usize;
(regen, comp, 1, 3)
}
1 => {
if data.len() < 3 {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "truncated compressed literals header".to_string(),
});
}
let combined =
((byte0 >> 4) as u32) | ((data[1] as u32) << 4) | ((data[2] as u32) << 12);
let regen = (combined & 0x3FF) as usize;
let comp = ((combined >> 10) & 0x3FF) as usize;
(regen, comp, 4, 3)
}
2 => {
if data.len() < 4 {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "truncated compressed literals header".to_string(),
});
}
let combined = ((byte0 >> 4) as u32)
| ((data[1] as u32) << 4)
| ((data[2] as u32) << 12)
| ((data[3] as u32) << 20);
let regen = (combined & 0x3FFF) as usize;
let comp = ((combined >> 14) & 0x3FFF) as usize;
(regen, comp, 4, 4)
}
3 => {
if data.len() < 5 {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "truncated compressed literals header".to_string(),
});
}
let combined = ((byte0 >> 4) as u64)
| ((data[1] as u64) << 4)
| ((data[2] as u64) << 12)
| ((data[3] as u64) << 20)
| ((data[4] as u64) << 28);
let regen = (combined & 0x3FFFF) as usize;
let comp = ((combined >> 18) & 0x3FFFF) as usize;
(regen, comp, 4, 5)
}
_ => unreachable!(),
};
Ok(LiteralsHeader {
block_type,
regenerated_size,
compressed_size,
num_streams,
header_size,
})
}
}
}
pub struct LiteralsDecoder {
huffman_table: Option<HuffmanTable>,
}
impl LiteralsDecoder {
pub fn new() -> Self {
Self {
huffman_table: None,
}
}
pub fn decode(&mut self, data: &[u8]) -> Result<(Vec<u8>, usize)> {
let header = parse_literals_header(data)?;
let content = &data[header.header_size..];
match header.block_type {
LiteralsBlockType::Raw => {
if content.len() < header.regenerated_size {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "truncated raw literals".to_string(),
});
}
let literals = content[..header.regenerated_size].to_vec();
Ok((literals, header.header_size + header.regenerated_size))
}
LiteralsBlockType::Rle => {
if content.is_empty() {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "missing RLE byte".to_string(),
});
}
let literals = vec![content[0]; header.regenerated_size];
Ok((literals, header.header_size + 1))
}
LiteralsBlockType::Compressed => {
if content.len() < header.compressed_size {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "truncated compressed literals".to_string(),
});
}
let (table, table_size) = read_huffman_table(content)?;
self.huffman_table = Some(table);
let stream_data = &content[table_size..header.compressed_size];
let literals = self.decode_huffman_streams(
stream_data,
header.regenerated_size,
header.num_streams,
)?;
Ok((literals, header.header_size + header.compressed_size))
}
LiteralsBlockType::Treeless => {
if self.huffman_table.is_none() {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "treeless literals without previous table".to_string(),
});
}
if content.len() < header.compressed_size {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "truncated treeless literals".to_string(),
});
}
let stream_data = &content[..header.compressed_size];
let literals = self.decode_huffman_streams(
stream_data,
header.regenerated_size,
header.num_streams,
)?;
Ok((literals, header.header_size + header.compressed_size))
}
}
}
fn decode_huffman_streams(
&self,
data: &[u8],
total_size: usize,
num_streams: usize,
) -> Result<Vec<u8>> {
let table = self
.huffman_table
.as_ref()
.ok_or_else(|| OxiArcError::CorruptedData {
offset: 0,
message: "no Huffman table".to_string(),
})?;
if num_streams == 1 {
self.decode_single_stream(data, total_size, table)
} else {
self.decode_four_streams(data, total_size, table)
}
}
fn decode_single_stream(
&self,
data: &[u8],
size: usize,
table: &HuffmanTable,
) -> Result<Vec<u8>> {
let mut reader = HuffmanBitReader::new(data)?;
let mut output = Vec::with_capacity(size);
while output.len() < size {
let bits = reader.peek_bits(table.max_bits());
let entry = table.decode(bits);
output.push(entry.symbol);
reader.consume(entry.num_bits);
}
Ok(output)
}
fn decode_four_streams(
&self,
data: &[u8],
total_size: usize,
table: &HuffmanTable,
) -> Result<Vec<u8>> {
if data.len() < 6 {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "truncated 4-stream jump table".to_string(),
});
}
let jump1 = u16::from_le_bytes([data[0], data[1]]) as usize;
let jump2 = u16::from_le_bytes([data[2], data[3]]) as usize;
let jump3 = u16::from_le_bytes([data[4], data[5]]) as usize;
let stream_data = &data[6..];
if jump1 > stream_data.len() || jump2 > stream_data.len() || jump3 > stream_data.len() {
return Err(OxiArcError::CorruptedData {
offset: 0,
message: "invalid stream jump offsets".to_string(),
});
}
let stream1 = &stream_data[..jump1];
let stream2 = &stream_data[jump1..jump2];
let stream3 = &stream_data[jump2..jump3];
let stream4 = &stream_data[jump3..];
let quarter = total_size.div_ceil(4);
let size1 = quarter;
let size2 = quarter;
let size3 = quarter;
let size4 = total_size - size1 - size2 - size3;
let mut output = Vec::with_capacity(total_size);
output.extend(self.decode_single_stream(stream1, size1, table)?);
output.extend(self.decode_single_stream(stream2, size2, table)?);
output.extend(self.decode_single_stream(stream3, size3, table)?);
output.extend(self.decode_single_stream(stream4, size4, table)?);
Ok(output)
}
}
impl Default for LiteralsDecoder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_raw_literals_small() {
let data = [0b00001000]; let header = parse_literals_header(&data).unwrap();
assert_eq!(header.block_type, LiteralsBlockType::Raw);
assert_eq!(header.regenerated_size, 1);
assert_eq!(header.header_size, 1);
}
#[test]
fn test_parse_rle_literals() {
let data = [0b00001001]; let header = parse_literals_header(&data).unwrap();
assert_eq!(header.block_type, LiteralsBlockType::Rle);
assert_eq!(header.regenerated_size, 1);
assert_eq!(header.compressed_size, 1);
}
#[test]
fn test_decode_raw_literals() {
let mut data = vec![0b00101000]; data.extend_from_slice(b"Hello");
let mut decoder = LiteralsDecoder::new();
let (literals, consumed) = decoder.decode(&data).unwrap();
assert_eq!(literals, b"Hello");
assert_eq!(consumed, 6);
}
#[test]
fn test_decode_rle_literals() {
let data = [0b00101001, b'A'];
let mut decoder = LiteralsDecoder::new();
let (literals, consumed) = decoder.decode(&data).unwrap();
assert_eq!(literals, vec![b'A'; 5]);
assert_eq!(consumed, 2);
}
}