use zpdf_core::{Error, PdfDict, PdfName, PdfObject, Result};
const MAX_DECODED_OUTPUT: usize = 1 << 30;
pub fn decode_stream(data: &[u8], dict: &PdfDict) -> Result<Vec<u8>> {
let filters = match dict.get("Filter") {
Some(PdfObject::Name(n)) => vec![n.clone()],
Some(PdfObject::Array(arr)) => arr
.iter()
.map(|obj| match obj {
PdfObject::Name(n) => Ok(n.clone()),
_ => Err(Error::TypeMismatch {
expected: "Name",
actual: obj.type_name(),
}),
})
.collect::<Result<Vec<_>>>()?,
Some(_) => {
return Err(Error::TypeMismatch {
expected: "Name or Array",
actual: "other",
})
}
None => return Ok(data.to_vec()),
};
let decode_parms = extract_decode_parms(dict, filters.len());
let mut result = data.to_vec();
for (i, filter) in filters.iter().enumerate() {
let params = decode_parms[i].as_ref();
result = apply_filter(filter, &result, params)?;
if let Some(p) = params {
result = apply_predictor(&result, p)?;
}
}
Ok(result)
}
fn extract_decode_parms(dict: &PdfDict, filter_count: usize) -> Vec<Option<PdfDict>> {
match dict.get("DecodeParms").or_else(|| dict.get("DP")) {
Some(PdfObject::Dict(d)) => {
let mut v = vec![None; filter_count];
if !v.is_empty() {
v[0] = Some(d.clone());
}
v
}
Some(PdfObject::Array(arr)) => arr
.iter()
.map(|obj| match obj {
PdfObject::Dict(d) => Some(d.clone()),
_ => None,
})
.chain(std::iter::repeat(None))
.take(filter_count)
.collect(),
_ => vec![None; filter_count],
}
}
fn apply_predictor(data: &[u8], params: &PdfDict) -> Result<Vec<u8>> {
let predictor = params.get_i64("Predictor").unwrap_or(1) as u32;
if predictor == 1 {
return Ok(data.to_vec());
}
let colors = params.get_i64("Colors").unwrap_or(1).max(1) as usize;
let bpc = params.get_i64("BitsPerComponent").unwrap_or(8).max(1) as usize;
let columns = params.get_i64("Columns").unwrap_or(1).max(1) as usize;
if predictor == 2 {
decode_tiff_predictor(data, colors, bpc, columns)
} else if predictor >= 10 {
decode_png_predictor(data, colors, bpc, columns)
} else {
Ok(data.to_vec())
}
}
fn decode_tiff_predictor(
data: &[u8],
colors: usize,
bpc: usize,
columns: usize,
) -> Result<Vec<u8>> {
if bpc != 8 {
return Ok(data.to_vec());
}
let row_bytes = columns * colors;
let mut output = data.to_vec();
for row_start in (0..output.len()).step_by(row_bytes) {
let row_end = (row_start + row_bytes).min(output.len());
for i in (row_start + colors)..row_end {
output[i] = output[i].wrapping_add(output[i - colors]);
}
}
Ok(output)
}
fn decode_png_predictor(data: &[u8], colors: usize, bpc: usize, columns: usize) -> Result<Vec<u8>> {
let row_bytes = (colors * bpc * columns).div_ceil(8);
let bpp = (colors * bpc).div_ceil(8); let stride = 1 + row_bytes;
if !data.len().is_multiple_of(stride) && !data.is_empty() {
tracing::debug!(
"PNG predictor: data length {} not multiple of stride {stride}",
data.len()
);
}
let num_rows = data.len().div_ceil(stride);
let mut output = Vec::with_capacity(num_rows * row_bytes);
let mut prev_row = vec![0u8; row_bytes];
let mut pos = 0;
while pos < data.len() {
if pos >= data.len() {
break;
}
let filter_type = data[pos];
pos += 1;
let available = (data.len() - pos).min(row_bytes);
let cur = &data[pos..pos + available];
let mut row = vec![0u8; row_bytes];
row[..available].copy_from_slice(cur);
match filter_type {
0 => {} 1 => {
for i in bpp..row_bytes {
row[i] = row[i].wrapping_add(row[i - bpp]);
}
}
2 => {
for i in 0..row_bytes {
row[i] = row[i].wrapping_add(prev_row[i]);
}
}
3 => {
for i in 0..row_bytes {
let left = if i >= bpp { row[i - bpp] as u16 } else { 0 };
let above = prev_row[i] as u16;
row[i] = row[i].wrapping_add(((left + above) / 2) as u8);
}
}
4 => {
for i in 0..row_bytes {
let left = if i >= bpp { row[i - bpp] as i32 } else { 0 };
let above = prev_row[i] as i32;
let upper_left = if i >= bpp {
prev_row[i - bpp] as i32
} else {
0
};
row[i] = row[i].wrapping_add(paeth(left, above, upper_left));
}
}
_ => {
tracing::debug!("PNG predictor: unknown filter type {filter_type}");
}
}
output.extend_from_slice(&row);
prev_row.copy_from_slice(&row);
pos += available;
}
Ok(output)
}
fn paeth(a: i32, b: i32, c: i32) -> u8 {
let p = a + b - c;
let pa = (p - a).abs();
let pb = (p - b).abs();
let pc = (p - c).abs();
if pa <= pb && pa <= pc {
a as u8
} else if pb <= pc {
b as u8
} else {
c as u8
}
}
fn apply_filter(filter: &PdfName, data: &[u8], params: Option<&PdfDict>) -> Result<Vec<u8>> {
match filter.as_str() {
"FlateDecode" | "Fl" => decode_flate(data),
"LZWDecode" | "LZW" => {
let early_change = params
.and_then(|p| p.get_i64("EarlyChange").ok())
.unwrap_or(1);
lzw_decode(data, early_change)
}
"ASCIIHexDecode" | "AHx" => decode_ascii_hex(data),
"ASCII85Decode" | "A85" => decode_ascii85(data),
"RunLengthDecode" | "RL" => decode_run_length(data),
"DCTDecode" | "DCT" => decode_dct(data),
"CCITTFaxDecode" | "CCF" => {
let ccitt_params = crate::ccitt::CcittParams::from_dict(params);
crate::ccitt::decode(data, &ccitt_params)
}
"JBIG2Decode" => {
let jbig2_params = crate::jbig2::Jbig2Params::from_dict(params);
crate::jbig2::decode(data, &jbig2_params)
}
"JPXDecode" => Ok(data.to_vec()),
other => Err(Error::UnsupportedFilter(other.to_string())),
}
}
fn lzw_decode(data: &[u8], early_change: i64) -> Result<Vec<u8>> {
const CLEAR: u32 = 256;
const EOD: u32 = 257;
let early: u32 = if early_change == 0 { 0 } else { 1 };
let mut table: Vec<Vec<u8>> = Vec::with_capacity(4096);
let reset = |t: &mut Vec<Vec<u8>>| {
t.clear();
for i in 0..256u32 {
t.push(vec![i as u8]);
}
t.push(Vec::new()); t.push(Vec::new()); };
reset(&mut table);
let mut width: u32 = 9;
let mut bit_pos: usize = 0;
let total_bits = data.len() * 8;
let read_code = |bit_pos: &mut usize, width: u32| -> Option<u32> {
if *bit_pos + width as usize > total_bits {
return None;
}
let mut code: u32 = 0;
for _ in 0..width {
let byte = data[*bit_pos / 8];
let bit = (byte >> (7 - (*bit_pos % 8))) & 1;
code = (code << 1) | bit as u32;
*bit_pos += 1;
}
Some(code)
};
let mut out: Vec<u8> = Vec::new();
let mut prev: Option<u32> = None;
while let Some(code) = read_code(&mut bit_pos, width) {
if code == EOD {
break;
}
if code == CLEAR {
reset(&mut table);
width = 9;
prev = None;
continue;
}
let entry: Vec<u8> = if (code as usize) < table.len() {
table[code as usize].clone()
} else if code as usize == table.len() {
match prev {
Some(p) => {
let mut e = table[p as usize].clone();
e.push(table[p as usize][0]);
e
}
None => {
return Err(Error::StreamDecode(format!(
"LZWDecode: code {code} before any literal"
)))
}
}
} else {
return Err(Error::StreamDecode(format!(
"LZWDecode: invalid code {code} (table size {})",
table.len()
)));
};
out.extend_from_slice(&entry);
if out.len() > MAX_DECODED_OUTPUT {
return Err(Error::StreamDecode(
"LZWDecode: output exceeds decompression limit".into(),
));
}
if let Some(p) = prev {
let mut new_entry = table[p as usize].clone();
new_entry.push(entry[0]);
table.push(new_entry);
}
prev = Some(code);
let next_code = table.len() as u32;
if width < 12 && next_code + early >= (1u32 << width) {
width += 1;
}
}
Ok(out)
}
enum InflateOutcome {
Complete(Vec<u8>),
Failed(Vec<u8>, String),
}
fn inflate_chunked(mut reader: impl std::io::Read) -> Result<InflateOutcome> {
let mut out = Vec::new();
let mut buf = [0u8; 16 * 1024];
loop {
match reader.read(&mut buf) {
Ok(0) => return Ok(InflateOutcome::Complete(out)),
Ok(n) => {
if out.len() + n > MAX_DECODED_OUTPUT {
return Err(Error::StreamDecode(
"FlateDecode: output exceeds decompression limit".into(),
));
}
out.extend_from_slice(&buf[..n]);
}
Err(e) => return Ok(InflateOutcome::Failed(out, e.to_string())),
}
}
}
fn decode_flate(data: &[u8]) -> Result<Vec<u8>> {
use flate2::read::{DeflateDecoder, ZlibDecoder};
if data.is_empty() {
return Ok(Vec::new());
}
let plausible_zlib = |i: usize| {
data.len() >= i + 2
&& data[i] & 0x0f == 8
&& ((data[i] as u32) << 8 | data[i + 1] as u32).is_multiple_of(31)
};
let mut zlib_err: Option<String> = None;
if plausible_zlib(0) {
match inflate_chunked(ZlibDecoder::new(data))? {
InflateOutcome::Complete(out) => return Ok(out),
InflateOutcome::Failed(partial, err) if !partial.is_empty() => {
tracing::warn!(
"FlateDecode: zlib stream failed after {} bytes ({err}); keeping partial output",
partial.len()
);
return Ok(partial);
}
InflateOutcome::Failed(_, err) => zlib_err = Some(err),
}
}
const MAX_HEADER_SCAN: usize = 64;
if let Some(k) = (1..data.len().min(MAX_HEADER_SCAN)).find(|&k| plausible_zlib(k)) {
match inflate_chunked(ZlibDecoder::new(&data[k..]))? {
InflateOutcome::Complete(out) => {
tracing::warn!("FlateDecode: skipped {k} bytes of leading garbage");
return Ok(out);
}
InflateOutcome::Failed(partial, err) if !partial.is_empty() => {
tracing::warn!(
"FlateDecode: zlib stream at offset {k} failed ({err}); keeping {} partial bytes",
partial.len()
);
return Ok(partial);
}
InflateOutcome::Failed(..) => {}
}
}
match inflate_chunked(DeflateDecoder::new(data))? {
InflateOutcome::Complete(out) => {
tracing::warn!("FlateDecode: decoded as raw deflate (missing zlib header)");
Ok(out)
}
InflateOutcome::Failed(partial, err) if !partial.is_empty() => {
tracing::warn!(
"FlateDecode: raw deflate failed ({err}); keeping {} partial bytes",
partial.len()
);
Ok(partial)
}
InflateOutcome::Failed(_, err) => Err(Error::StreamDecode(format!(
"FlateDecode: {}",
zlib_err.unwrap_or(err)
))),
}
}
fn decode_ascii_hex(data: &[u8]) -> Result<Vec<u8>> {
let mut output = Vec::with_capacity(data.len() / 2);
let mut high: Option<u8> = None;
let mut stray = 0usize;
for &b in data {
if b == b'>' {
break; }
if b.is_ascii_whitespace() || b == 0 {
continue;
}
let nibble = match b {
b'0'..=b'9' => b - b'0',
b'a'..=b'f' => b - b'a' + 10,
b'A'..=b'F' => b - b'A' + 10,
_ => {
stray += 1;
continue;
}
};
match high {
None => high = Some(nibble),
Some(h) => {
output.push((h << 4) | nibble);
high = None;
}
}
}
if let Some(h) = high {
output.push(h << 4);
}
if stray > 0 {
tracing::warn!("ASCIIHexDecode: ignored {stray} invalid byte(s)");
}
Ok(output)
}
fn decode_ascii85(data: &[u8]) -> Result<Vec<u8>> {
let mut output = Vec::new();
let mut tuple: u64 = 0;
let mut count = 0usize;
let mut stray = 0usize;
for &b in data {
if b == b'~' {
break; }
if b.is_ascii_whitespace() || b == 0 {
continue;
}
if b == b'z' && count == 0 {
output.extend_from_slice(&[0, 0, 0, 0]);
continue;
}
if !(b'!'..=b'u').contains(&b) {
stray += 1;
continue;
}
tuple = tuple * 85 + (b - b'!') as u64;
count += 1;
if count == 5 {
let t = (tuple & 0xFFFF_FFFF) as u32;
output.extend_from_slice(&t.to_be_bytes());
tuple = 0;
count = 0;
}
}
if count > 1 {
for _ in count..5 {
tuple = tuple * 85 + 84; }
let t = (tuple & 0xFFFF_FFFF) as u32;
for i in 0..(count - 1) {
output.push((t >> (24 - i * 8)) as u8);
}
}
if stray > 0 {
tracing::warn!("ASCII85Decode: ignored {stray} invalid byte(s)");
}
Ok(output)
}
fn decode_dct(data: &[u8]) -> Result<Vec<u8>> {
use zune_jpeg::JpegDecoder;
if jpeg_is_adobe_ycck(data) {
use zune_jpeg::zune_core::colorspace::ColorSpace;
use zune_jpeg::zune_core::options::DecoderOptions;
let opts = DecoderOptions::default().jpeg_set_out_colorspace(ColorSpace::YCCK);
let mut decoder = JpegDecoder::new_with_options(std::io::Cursor::new(data), opts);
match decoder.decode() {
Ok(ycck) if decoder.output_colorspace() == Some(ColorSpace::YCCK) => {
return Ok(ycck_to_rgb(&ycck));
}
_ => {}
}
}
let mut decoder = JpegDecoder::new(std::io::Cursor::new(data));
decoder
.decode()
.map_err(|e| Error::StreamDecode(format!("DCTDecode: {e}")))
}
fn ycck_to_rgb(ycck: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(ycck.len() / 4 * 3);
for px in ycck.chunks_exact(4) {
let (y, cb, cr) = (px[0] as f64, px[1] as f64, px[2] as f64);
let rp = (y + 1.402 * (cr - 128.0)).clamp(0.0, 255.0);
let gp = (y - 0.344_136 * (cb - 128.0) - 0.714_136 * (cr - 128.0)).clamp(0.0, 255.0);
let bp = (y + 1.772 * (cb - 128.0)).clamp(0.0, 255.0);
let (r, g, b) = zpdf_color::cmyk_to_rgb(
1.0 - rp / 255.0,
1.0 - gp / 255.0,
1.0 - bp / 255.0,
px[3] as f64 / 255.0,
);
out.push((r * 255.0).round() as u8);
out.push((g * 255.0).round() as u8);
out.push((b * 255.0).round() as u8);
}
out
}
fn jpeg_is_adobe_ycck(data: &[u8]) -> bool {
let mut adobe_ycck = false;
let mut four_components = false;
let mut i = 2; while i + 3 < data.len() {
if data[i] != 0xFF {
i += 1;
continue;
}
let marker = data[i + 1];
if marker == 0xFF || marker == 0x01 || (0xD0..=0xD9).contains(&marker) {
i += 2;
continue;
}
let seg_len = ((data[i + 2] as usize) << 8) | data[i + 3] as usize;
if seg_len < 2 {
break;
}
let payload_start = i + 4;
let payload_end = i + 2 + seg_len;
if payload_end > data.len() {
break;
}
let payload = &data[payload_start..payload_end];
match marker {
0xEE => {
if payload.len() >= 12 && &payload[0..5] == b"Adobe" {
adobe_ycck = payload[11] == 2;
}
}
0xC0..=0xCF if marker != 0xC4 && marker != 0xC8 && marker != 0xCC => {
if payload.len() >= 6 {
four_components = payload[5] == 4;
}
}
0xDA => break,
_ => {}
}
i = payload_end;
}
adobe_ycck && four_components
}
fn decode_run_length(data: &[u8]) -> Result<Vec<u8>> {
let mut output = Vec::new();
let mut i = 0;
while i < data.len() {
let length_byte = data[i];
i += 1;
if length_byte == 128 {
break; } else if length_byte < 128 {
let count = length_byte as usize + 1;
if i + count > data.len() {
return Err(Error::StreamDecode("RunLengthDecode: truncated".into()));
}
if output.len() + count > MAX_DECODED_OUTPUT {
return Err(Error::StreamDecode(
"RunLengthDecode: output exceeds decompression limit".into(),
));
}
output.extend_from_slice(&data[i..i + count]);
i += count;
} else {
let count = 257 - length_byte as usize;
if i >= data.len() {
return Err(Error::StreamDecode("RunLengthDecode: truncated".into()));
}
if output.len() + count > MAX_DECODED_OUTPUT {
return Err(Error::StreamDecode(
"RunLengthDecode: output exceeds decompression limit".into(),
));
}
let byte = data[i];
i += 1;
output.resize(output.len() + count, byte);
}
}
Ok(output)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ycck_white_decodes_white_not_black() {
let rgb = ycck_to_rgb(&[255, 128, 128, 0]);
assert_eq!(rgb, vec![255, 255, 255], "Adobe YCCK white must stay white");
}
#[test]
fn ycck_full_black_ink_decodes_near_black() {
let rgb = ycck_to_rgb(&[255, 128, 128, 255]);
assert_eq!(rgb, vec![44, 46, 53]);
}
#[test]
fn ycck_neutral_gray_via_polynomial() {
let rgb = ycck_to_rgb(&[255, 128, 128, 128]);
assert_eq!(rgb, vec![154, 156, 159]);
}
#[test]
fn ycck_colored_pixel_via_polynomial() {
let rgb = ycck_to_rgb(&[200, 100, 150, 50]);
assert_eq!(rgb, vec![198, 165, 131]);
}
#[test]
fn adobe_ycck_detection() {
let mut j = vec![0xFF, 0xD8];
j.extend_from_slice(&[0xFF, 0xEE, 0x00, 0x0E]);
j.extend_from_slice(b"Adobe");
j.extend_from_slice(&[0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x02]); j.extend_from_slice(&[0xFF, 0xC0, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x10, 0x04]);
j.extend_from_slice(&[1, 0x11, 0, 2, 0x11, 0, 3, 0x11, 0, 4, 0x11, 0]);
j.extend_from_slice(&[0xFF, 0xDA, 0x00, 0x02]); assert!(jpeg_is_adobe_ycck(&j));
let mut j0 = j.clone();
j0[17] = 0;
assert!(!jpeg_is_adobe_ycck(&j0));
}
#[test]
fn flate_roundtrip() {
use flate2::write::ZlibEncoder;
use flate2::Compression;
use std::io::Write;
let original = b"Hello, zpdf! This is a test of FlateDecode.";
let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
encoder.write_all(original).unwrap();
let compressed = encoder.finish().unwrap();
let decoded = decode_flate(&compressed).unwrap();
assert_eq!(decoded, original);
}
#[test]
fn flate_partial_salvage_on_truncation() {
use flate2::write::ZlibEncoder;
use flate2::Compression;
use std::io::Write;
let mut state = 0x2545F491u64;
let original: Vec<u8> = (0..64 * 1024)
.map(|_| {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
(state >> 33) as u8
})
.collect();
let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let truncated = &compressed[..compressed.len() / 2];
let decoded = decode_flate(truncated).unwrap();
assert!(!decoded.is_empty(), "partial output must be salvaged");
assert!(decoded.len() < original.len());
assert_eq!(
&original[..decoded.len()],
&decoded[..],
"salvaged bytes are a prefix"
);
}
#[test]
fn flate_raw_deflate_fallback() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original = b"raw deflate stream without a zlib wrapper".to_vec();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let decoded = decode_flate(&compressed).unwrap();
assert_eq!(decoded, original);
}
#[test]
fn flate_skips_leading_garbage() {
use flate2::write::ZlibEncoder;
use flate2::Compression;
use std::io::Write;
let original = b"zlib data behind a garbage prefix".to_vec();
let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let mut data = b"\r\n\xff".to_vec();
data.extend_from_slice(&compressed);
let decoded = decode_flate(&data).unwrap();
assert_eq!(decoded, original);
}
#[test]
fn flate_empty_input_is_empty_output() {
assert_eq!(decode_flate(&[]).unwrap(), Vec::<u8>::new());
}
#[test]
fn flate_garbage_still_errors() {
assert!(decode_flate(b"this is not compressed data at all....").is_err());
}
#[test]
fn ascii_hex() {
let decoded = decode_ascii_hex(b"48 65 6C 6C 6F>").unwrap();
assert_eq!(decoded, b"Hello");
}
#[test]
fn ascii_hex_tolerates_stray_bytes_and_data_after_eod() {
let decoded = decode_ascii_hex(b"48 65 x6C!6C 6F> trailing garbage \xff").unwrap();
assert_eq!(decoded, b"Hello");
}
#[test]
fn ascii85_basic() {
let decoded = decode_ascii85(b"9jqo^~>").unwrap();
assert_eq!(decoded, b"Man ");
}
#[test]
fn ascii85_ignores_bytes_after_eod() {
let decoded = decode_ascii85(b"9jqo^~> stray bytes \xff\xfe after EOD").unwrap();
assert_eq!(decoded, b"Man ");
}
#[test]
fn ascii85_skips_stray_bytes_and_whitespace() {
let decoded = decode_ascii85(b"9j\x00qo\xff ^~>").unwrap();
assert_eq!(decoded, b"Man ");
}
#[test]
fn ascii85_overflowing_group_does_not_panic() {
assert!(decode_ascii85(b"uuuuu~>").is_ok());
}
#[test]
fn run_length_literal_and_repeat() {
let data = [1, 0x41, 0x42, 254, 0x43, 128];
let decoded = decode_run_length(&data).unwrap();
assert_eq!(decoded, vec![0x41, 0x42, 0x43, 0x43, 0x43]);
}
#[test]
fn png_predictor_none() {
let data = [0, 0x41, 0x42];
let result = decode_png_predictor(&data, 1, 8, 2).unwrap();
assert_eq!(result, vec![0x41, 0x42]);
}
#[test]
fn png_predictor_sub() {
let data = [1, 10, 5, 3];
let result = decode_png_predictor(&data, 1, 8, 3).unwrap();
assert_eq!(result, vec![10, 15, 18]);
}
#[test]
fn png_predictor_up() {
let data = [0, 10, 20, 2, 5, 3];
let result = decode_png_predictor(&data, 1, 8, 2).unwrap();
assert_eq!(result, vec![10, 20, 15, 23]);
}
#[test]
fn png_predictor_paeth() {
let data = [0, 10, 20, 30, 4, 5, 7, 3];
let result = decode_png_predictor(&data, 1, 8, 3).unwrap();
assert_eq!(result, vec![10, 20, 30, 15, 27, 33]);
}
#[test]
fn tiff_predictor_basic() {
let data = [100, 150, 200, 10, 20, 30];
let result = decode_tiff_predictor(&data, 3, 8, 2).unwrap();
assert_eq!(result, vec![100, 150, 200, 110, 170, 230]);
}
#[test]
fn lzw_canonical_vector() {
let data = [0x80, 0x0B, 0x60, 0x50, 0x22, 0x0C, 0x0C, 0x85, 0x01];
let decoded = lzw_decode(&data, 1).unwrap();
assert_eq!(decoded, b"-----A---B");
}
#[test]
fn lzw_via_apply_filter_default_early_change() {
let data = [0x80, 0x0B, 0x60, 0x50, 0x22, 0x0C, 0x0C, 0x85, 0x01];
let name = PdfName::new("LZWDecode");
let out = apply_filter(&name, &data, None).unwrap();
assert_eq!(out, b"-----A---B");
}
#[test]
fn lzw_stops_at_end_without_eod() {
let data = [0x80, 0x0B, 0x60, 0x50];
let out = lzw_decode(&data, 1).unwrap();
assert!(out.starts_with(b"-"));
}
#[test]
fn lzw_empty_input() {
assert_eq!(lzw_decode(&[], 1).unwrap(), Vec::<u8>::new());
}
fn weezl_encode(data: &[u8], early_change: i64) -> Vec<u8> {
use weezl::{encode::Encoder, BitOrder};
let mut enc = if early_change == 0 {
Encoder::new(BitOrder::Msb, 8)
} else {
Encoder::with_tiff_size_switch(BitOrder::Msb, 8)
};
enc.encode(data).expect("weezl encode")
}
#[test]
fn lzw_roundtrip_against_weezl() {
for ec in [1i64, 0] {
for &len in &[0usize, 1, 300, 600, 1200, 3000, 5000, 9000] {
let input: Vec<u8> = (0..len).map(|i| ((i * 7 + i / 11) % 251) as u8).collect();
let encoded = weezl_encode(&input, ec);
let decoded = lzw_decode(&encoded, ec).unwrap();
assert_eq!(decoded, input, "ec={ec} len={len}");
}
}
}
#[test]
fn lzw_single_byte_run_roundtrip_against_weezl() {
let input = vec![b'A'; 5000];
for ec in [1i64, 0] {
let encoded = weezl_encode(&input, ec);
assert_eq!(lzw_decode(&encoded, ec).unwrap(), input, "ec={ec}");
}
}
}