use crate::error::{Result, TiffError as Error};
const MAX_INITIAL_RESERVE: usize = 64 * 1024;
const MAX_CODEC_OUTPUT: usize = 64 * 1024 * 1024;
pub fn unpack_packbits(input: &[u8], expected_len: usize) -> Result<Vec<u8>> {
let reserve = expected_len
.min(input.len().saturating_mul(128).saturating_add(1))
.min(MAX_INITIAL_RESERVE);
let mut out = Vec::with_capacity(reserve);
let mut i = 0;
while i < input.len() && out.len() < expected_len {
let n = input[i] as i8;
i += 1;
if n >= 0 {
let len = n as usize + 1;
if i + len > input.len() {
return Err(Error::invalid("TIFF/PackBits: literal run past EOF"));
}
out.extend_from_slice(&input[i..i + len]);
i += len;
} else if n == -128 {
continue;
} else {
let count = (-(n as i32) + 1) as usize;
if i >= input.len() {
return Err(Error::invalid("TIFF/PackBits: replicate run past EOF"));
}
let b = input[i];
i += 1;
for _ in 0..count {
out.push(b);
if out.len() >= expected_len {
break;
}
}
}
}
Ok(out)
}
const LZW_CLEAR_CODE: u16 = 256;
const LZW_EOI_CODE: u16 = 257;
const LZW_FIRST_CODE: u16 = 258;
const LZW_MAX_CODE: u16 = 4095;
struct LzwBits<'a> {
src: &'a [u8],
bit_pos: usize, }
impl<'a> LzwBits<'a> {
fn new(src: &'a [u8]) -> Self {
Self { src, bit_pos: 0 }
}
fn read(&mut self, n: u32) -> Option<u16> {
let total_bits = self.src.len() * 8;
if self.bit_pos + n as usize > total_bits {
return None;
}
let mut value: u32 = 0;
let mut remaining = n;
let mut pos = self.bit_pos;
while remaining > 0 {
let byte = self.src[pos / 8];
let bit_in_byte = (pos % 8) as u32;
let avail = 8 - bit_in_byte;
let take = avail.min(remaining);
let shift_right = avail - take;
let mask = (1u32 << take) - 1;
let chunk = ((byte as u32) >> shift_right) & mask;
value = (value << take) | chunk;
pos += take as usize;
remaining -= take;
}
self.bit_pos = pos;
Some(value as u16)
}
}
pub fn unpack_lzw(input: &[u8], expected_len: usize) -> Result<Vec<u8>> {
let mut bits = LzwBits::new(input);
let reserve = expected_len.min(MAX_INITIAL_RESERVE);
let mut out = Vec::with_capacity(reserve);
const NONE: u16 = u16::MAX;
let mut prefix: Vec<u16> = vec![0; (LZW_MAX_CODE as usize) + 1];
let mut suffix: Vec<u8> = vec![0; (LZW_MAX_CODE as usize) + 1];
let mut len: Vec<u32> = vec![0; (LZW_MAX_CODE as usize) + 1];
let init = |prefix: &mut [u16], suffix: &mut [u8], len: &mut [u32]| {
for i in 0..=255u16 {
prefix[i as usize] = NONE;
suffix[i as usize] = i as u8;
len[i as usize] = 1;
}
};
init(&mut prefix, &mut suffix, &mut len);
let mut code_size: u32 = 9;
let mut next_code: u16 = LZW_FIRST_CODE;
let mut prev_code: Option<u16> = None;
let mut scratch: Vec<u8> = Vec::with_capacity(64);
let emit = |code: u16,
out: &mut Vec<u8>,
scratch: &mut Vec<u8>,
prefix: &[u16],
suffix: &[u8]|
-> Option<u8> {
scratch.clear();
let mut c = code;
for _ in 0..=(LZW_MAX_CODE as usize) {
scratch.push(suffix[c as usize]);
let p = prefix[c as usize];
if p == NONE {
for &b in scratch.iter().rev() {
out.push(b);
}
return Some(*scratch.last().unwrap());
}
c = p;
}
None
};
while let Some(code) = bits.read(code_size) {
if code == LZW_EOI_CODE {
break;
}
if code == LZW_CLEAR_CODE {
init(&mut prefix, &mut suffix, &mut len);
code_size = 9;
next_code = LZW_FIRST_CODE;
prev_code = None;
continue;
}
if let Some(prev) = prev_code {
if code < next_code {
let first = emit(code, &mut out, &mut scratch, &prefix, &suffix)
.ok_or_else(|| Error::invalid("TIFF/LZW: prefix chain cycle"))?;
if next_code <= LZW_MAX_CODE {
prefix[next_code as usize] = prev;
suffix[next_code as usize] = first;
len[next_code as usize] = len[prev as usize] + 1;
next_code += 1;
}
} else if code == next_code {
let first = emit(prev, &mut out, &mut scratch, &prefix, &suffix)
.ok_or_else(|| Error::invalid("TIFF/LZW: prefix chain cycle"))?;
out.push(first);
if next_code <= LZW_MAX_CODE {
prefix[next_code as usize] = prev;
suffix[next_code as usize] = first;
len[next_code as usize] = len[prev as usize] + 1;
next_code += 1;
}
} else {
return Err(Error::invalid(format!(
"TIFF/LZW: code {code} > next_code {next_code}"
)));
}
} else {
if code >= LZW_CLEAR_CODE {
return Err(Error::invalid(format!(
"TIFF/LZW: first code after Clear is {code}, must be a leaf (<256)"
)));
}
let _first = emit(code, &mut out, &mut scratch, &prefix, &suffix)
.ok_or_else(|| Error::invalid("TIFF/LZW: prefix chain cycle"))?;
}
prev_code = Some(code);
if code_size < 12 && next_code >= (1u16 << code_size) - 1 {
code_size += 1;
}
if out.len() >= expected_len {
break;
}
}
Ok(out)
}
pub fn unpack_deflate(input: &[u8], expected_len: usize) -> Result<Vec<u8>> {
let limit = expected_len.min(MAX_CODEC_OUTPUT) as u64;
compcol::vec::decompress_to_vec_capped::<compcol::zlib::Zlib>(input, limit)
.map_err(|e| Error::invalid(format!("TIFF/Deflate: zlib inflate failed: {e:?}")))
}
pub fn unpack_zstd(input: &[u8], expected_len: usize) -> Result<Vec<u8>> {
let limit = expected_len.min(MAX_CODEC_OUTPUT) as u64;
compcol::vec::decompress_to_vec_capped::<compcol::zstd::Zstd>(input, limit)
.map_err(|e| Error::invalid(format!("TIFF/ZSTD: Zstandard frame decode failed: {e:?}")))
}
pub fn pack_packbits(input: &[u8]) -> Vec<u8> {
let mut out = Vec::new();
let mut i = 0;
while i < input.len() {
let mut run = 1usize;
while run < 128 && i + run < input.len() && input[i + run] == input[i] {
run += 1;
}
if run >= 3 {
out.push((-(run as i32 - 1)) as i8 as u8);
out.push(input[i]);
i += run;
} else {
let lit_start = i;
let mut lit_end = i + 1;
while lit_end - lit_start < 128 && lit_end < input.len() {
let r = lit_end + 2 < input.len()
&& input[lit_end] == input[lit_end + 1]
&& input[lit_end + 1] == input[lit_end + 2];
if r {
break;
}
lit_end += 1;
}
let n = lit_end - lit_start;
out.push((n as i32 - 1) as u8);
out.extend_from_slice(&input[lit_start..lit_end]);
i = lit_end;
}
}
out
}
const LZW_TRIE_SIZE: usize = (LZW_MAX_CODE as usize) + 1;
const LZW_TRIE_NONE: u16 = 0;
struct LzwTrie {
first_child: [u16; LZW_TRIE_SIZE],
next_sibling: [u16; LZW_TRIE_SIZE],
suffix: [u8; LZW_TRIE_SIZE],
}
impl LzwTrie {
fn new() -> Box<Self> {
Box::new(Self {
first_child: [0; LZW_TRIE_SIZE],
next_sibling: [0; LZW_TRIE_SIZE],
suffix: [0; LZW_TRIE_SIZE],
})
}
fn reset(&mut self) {
self.first_child.fill(LZW_TRIE_NONE);
}
#[inline]
fn lookup(&self, parent: u16, byte: u8) -> Option<u16> {
let mut c = self.first_child[parent as usize];
while c != LZW_TRIE_NONE {
if self.suffix[c as usize] == byte {
return Some(c);
}
c = self.next_sibling[c as usize];
}
None
}
#[inline]
fn insert(&mut self, parent: u16, byte: u8, new_code: u16) {
self.suffix[new_code as usize] = byte;
self.next_sibling[new_code as usize] = self.first_child[parent as usize];
self.first_child[parent as usize] = new_code;
}
}
pub fn pack_lzw(input: &[u8]) -> Vec<u8> {
let mut trie = LzwTrie::new();
let mut next_code: u16 = LZW_FIRST_CODE;
let mut code_size: u32 = 9;
let mut bit_buf: u64 = 0;
let mut bit_count: u32 = 0;
let mut out = Vec::new();
let write_code = |c: u16, n: u32, bit_buf: &mut u64, bit_count: &mut u32, out: &mut Vec<u8>| {
*bit_buf = (*bit_buf << n) | (c as u64);
*bit_count += n;
while *bit_count >= 8 {
let shift = *bit_count - 8;
let b = ((*bit_buf >> shift) & 0xFF) as u8;
out.push(b);
*bit_count -= 8;
*bit_buf &= (1u64 << *bit_count) - 1;
}
};
write_code(
LZW_CLEAR_CODE,
code_size,
&mut bit_buf,
&mut bit_count,
&mut out,
);
if input.is_empty() {
write_code(
LZW_EOI_CODE,
code_size,
&mut bit_buf,
&mut bit_count,
&mut out,
);
} else {
let mut current: u16 = input[0] as u16;
for &b in &input[1..] {
if let Some(c) = trie.lookup(current, b) {
current = c;
} else {
write_code(current, code_size, &mut bit_buf, &mut bit_count, &mut out);
if next_code <= LZW_MAX_CODE {
trie.insert(current, b, next_code);
if code_size < 12 && next_code >= (1u16 << code_size) - 1 {
code_size += 1;
}
next_code += 1;
} else {
write_code(
LZW_CLEAR_CODE,
code_size,
&mut bit_buf,
&mut bit_count,
&mut out,
);
trie.reset();
next_code = LZW_FIRST_CODE;
code_size = 9;
}
current = b as u16;
}
}
write_code(current, code_size, &mut bit_buf, &mut bit_count, &mut out);
write_code(
LZW_EOI_CODE,
code_size,
&mut bit_buf,
&mut bit_count,
&mut out,
);
}
if bit_count > 0 {
let b = ((bit_buf << (8 - bit_count)) & 0xFF) as u8;
out.push(b);
}
out
}
pub fn pack_deflate(input: &[u8]) -> Result<Vec<u8>> {
compcol::vec::compress_to_vec::<compcol::zlib::Zlib>(input)
.map_err(|e| Error::invalid(format!("TIFF/Deflate: zlib deflate failed: {e:?}")))
}
pub fn pack_zstd(input: &[u8]) -> Result<Vec<u8>> {
compcol::vec::compress_to_vec::<compcol::zstd::Zstd>(input)
.map_err(|e| Error::invalid(format!("TIFF/ZSTD: Zstandard frame encode failed: {e:?}")))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn packbits_roundtrip_replicates() {
let src = vec![0xAA; 200]; let p = pack_packbits(&src);
let back = unpack_packbits(&p, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn packbits_roundtrip_literals() {
let src: Vec<u8> = (0..50).collect();
let p = pack_packbits(&src);
let back = unpack_packbits(&p, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn packbits_mixed() {
let mut src = vec![0u8; 10];
src.extend_from_slice(&[1, 2, 3, 4, 5]);
src.extend(std::iter::repeat(0xFF).take(20));
let p = pack_packbits(&src);
let back = unpack_packbits(&p, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn packbits_noop_byte_skipped() {
let input = vec![0x80, 0x00, b'A']; let out = unpack_packbits(&input, 1).unwrap();
assert_eq!(out, b"A");
}
#[test]
fn lzw_roundtrip_short() {
let src = b"TOBEORNOTTOBEORTOBEORNOT".to_vec();
let encoded = pack_lzw(&src);
let back = unpack_lzw(&encoded, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn lzw_roundtrip_zeros() {
let src = vec![0u8; 4096];
let encoded = pack_lzw(&src);
let back = unpack_lzw(&encoded, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn lzw_rejects_first_code_above_leaf_range() {
let err = unpack_lzw(&[0x81, 0x00, 0x00], 64).unwrap_err();
let msg = format!("{err:?}");
assert!(
msg.contains("first code after Clear"),
"unexpected error: {msg}"
);
}
#[test]
fn lzw_initial_reserve_is_bounded() {
let out = unpack_lzw(&[], usize::MAX / 2).unwrap();
assert!(out.is_empty());
let out = unpack_packbits(&[], usize::MAX / 2).unwrap();
assert!(out.is_empty());
}
#[test]
fn deflate_output_is_bounded() {
let src = vec![0u8; 1024];
let encoded = pack_deflate(&src).unwrap();
let back = unpack_deflate(&encoded, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn deflate_cap_exceeded_errors() {
let src = vec![0u8; 4096];
let encoded = pack_deflate(&src).unwrap();
assert!(unpack_deflate(&encoded, 16).is_err());
}
#[test]
fn zstd_roundtrip_zeros() {
let src = vec![0u8; 4096];
let encoded = pack_zstd(&src).unwrap();
assert_eq!(&encoded[..4], &[0x28, 0xB5, 0x2F, 0xFD]);
let back = unpack_zstd(&encoded, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn zstd_roundtrip_random_8k() {
let mut s: u32 = 0x2545_F491;
let src: Vec<u8> = (0..8192)
.map(|_| {
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
s as u8
})
.collect();
let encoded = pack_zstd(&src).unwrap();
let back = unpack_zstd(&encoded, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn zstd_roundtrip_textlike() {
let motif = b"row of sample bytes 01234567 ";
let mut src = Vec::with_capacity(motif.len() * 512);
for _ in 0..512 {
src.extend_from_slice(motif);
}
let encoded = pack_zstd(&src).unwrap();
let back = unpack_zstd(&encoded, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn zstd_cap_exceeded_errors() {
let src = vec![0u8; 4096];
let encoded = pack_zstd(&src).unwrap();
assert!(unpack_zstd(&encoded, 16).is_err());
}
#[test]
fn zstd_garbage_input_errors() {
assert!(unpack_zstd(&[0xDE, 0xAD, 0xBE, 0xEF, 0x00], 64).is_err());
assert!(unpack_zstd(&[0x28, 0xB5, 0x2F, 0xFD], 64).is_err());
assert!(unpack_zstd(&[], 64).is_err());
}
#[test]
fn lzw_roundtrip_random_8k() {
let mut s: u32 = 0x9E37_79B9;
let src: Vec<u8> = (0..8192)
.map(|_| {
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
s as u8
})
.collect();
let encoded = pack_lzw(&src);
let back = unpack_lzw(&encoded, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn lzw_roundtrip_table_fill_clear() {
let mut s: u32 = 0xC0FF_EE17;
let src: Vec<u8> = (0..(64 * 1024))
.map(|_| {
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
s as u8
})
.collect();
let encoded = pack_lzw(&src);
let back = unpack_lzw(&encoded, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn lzw_roundtrip_byte_pattern_repeated() {
let motif = b"abcdefghijklmnop";
let mut src = Vec::with_capacity(motif.len() * 2048);
for _ in 0..2048 {
src.extend_from_slice(motif);
}
let encoded = pack_lzw(&src);
let back = unpack_lzw(&encoded, src.len()).unwrap();
assert_eq!(back, src);
}
#[test]
fn lzw_trie_lookup_and_insert() {
let mut t = LzwTrie::new();
t.insert(b'A' as u16, b'B', 258);
t.insert(b'A' as u16, b'C', 259);
assert_eq!(t.lookup(b'A' as u16, b'B'), Some(258));
assert_eq!(t.lookup(b'A' as u16, b'C'), Some(259));
assert_eq!(t.lookup(b'A' as u16, b'X'), None);
assert_eq!(t.lookup(b'Z' as u16, b'B'), None);
t.reset();
assert_eq!(t.lookup(b'A' as u16, b'B'), None);
t.insert(b'A' as u16, b'B', 258);
assert_eq!(t.lookup(b'A' as u16, b'B'), Some(258));
}
}