use crate::encodings;
use crate::encodings::Encoding;
use crate::encodings::cmap::ToUnicodeCMap;
use crate::error::DecompressError;
use crate::{Document, Error, Result};
use indexmap::IndexMap;
use log::warn;
use std::cmp::max;
use std::fmt;
use std::str;
const MAX_DECOMPRESSED_BYTES: usize = 256 * 1024 * 1024;
pub type ObjectId = (u32, u16);
#[derive(Clone, Default, PartialEq)]
pub struct Dictionary(IndexMap<Vec<u8>, Object>);
#[derive(Debug, Clone, PartialEq)]
pub struct Stream {
pub dict: Dictionary,
pub content: Vec<u8>,
pub allows_compression: bool,
pub start_position: Option<usize>,
}
#[derive(Clone, PartialEq)]
pub enum Object {
Null,
Boolean(bool),
Integer(i64),
Real(f32),
Name(Vec<u8>),
String(Vec<u8>, StringFormat),
Array(Vec<Object>),
Dictionary(Dictionary),
Stream(Stream),
Reference(ObjectId),
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum StringFormat {
#[default]
Literal,
Hexadecimal,
}
impl From<bool> for Object {
fn from(value: bool) -> Self {
Object::Boolean(value)
}
}
impl From<i64> for Object {
fn from(number: i64) -> Self {
Object::Integer(number)
}
}
macro_rules! from_smaller_ints {
($( $Int: ty )+) => {
$(
impl From<$Int> for Object {
fn from(number: $Int) -> Self {
Object::Integer(i64::from(number))
}
}
)+
}
}
from_smaller_ints! {
i8 i16 i32
u8 u16 u32
}
impl From<f64> for Object {
fn from(number: f64) -> Self {
Object::Real(number as f32)
}
}
impl From<f32> for Object {
fn from(number: f32) -> Self {
Object::Real(number)
}
}
impl From<String> for Object {
fn from(name: String) -> Self {
Object::Name(name.into_bytes())
}
}
impl<'a> From<&'a str> for Object {
fn from(name: &'a str) -> Self {
Object::Name(name.as_bytes().to_vec())
}
}
impl From<Vec<Object>> for Object {
fn from(array: Vec<Object>) -> Self {
Object::Array(array)
}
}
impl From<Dictionary> for Object {
fn from(dict: Dictionary) -> Self {
Object::Dictionary(dict)
}
}
impl From<Stream> for Object {
fn from(stream: Stream) -> Self {
Object::Stream(stream)
}
}
impl From<ObjectId> for Object {
fn from(id: ObjectId) -> Self {
Object::Reference(id)
}
}
impl Object {
pub fn string_literal<S: Into<Vec<u8>>>(s: S) -> Self {
Object::String(s.into(), StringFormat::Literal)
}
pub fn is_null(&self) -> bool {
matches!(*self, Object::Null)
}
pub fn as_bool(&self) -> Result<bool> {
match self {
Object::Boolean(value) => Ok(*value),
_ => Err(Error::ObjectType {
expected: "Boolean",
found: self.enum_variant(),
}),
}
}
pub fn as_i64(&self) -> Result<i64> {
match self {
Object::Integer(value) => Ok(*value),
_ => Err(Error::ObjectType {
expected: "Integer",
found: self.enum_variant(),
}),
}
}
pub fn as_f32(&self) -> Result<f32> {
match self {
Object::Real(value) => Ok(*value),
_ => Err(Error::ObjectType {
expected: "Real",
found: self.enum_variant(),
}),
}
}
pub fn as_float(&self) -> Result<f32> {
match self {
Object::Integer(value) => Ok(*value as f32),
Object::Real(value) => Ok(*value),
_ => Err(Error::ObjectType {
expected: "Integer or Real",
found: self.enum_variant(),
}),
}
}
pub fn as_name(&self) -> Result<&[u8]> {
match self {
Object::Name(name) => Ok(name),
_ => Err(Error::ObjectType {
expected: "Name",
found: self.enum_variant(),
}),
}
}
pub fn as_str(&self) -> Result<&[u8]> {
match self {
Object::String(string, _) => Ok(string),
_ => Err(Error::ObjectType {
expected: "String",
found: self.enum_variant(),
}),
}
}
pub fn as_str_mut(&mut self) -> Result<&mut Vec<u8>> {
match self {
Object::String(string, _) => Ok(string),
_ => Err(Error::ObjectType {
expected: "String",
found: self.enum_variant(),
}),
}
}
pub fn as_reference(&self) -> Result<ObjectId> {
match self {
Object::Reference(id) => Ok(*id),
_ => Err(Error::ObjectType {
expected: "Reference",
found: self.enum_variant(),
}),
}
}
pub fn as_array(&self) -> Result<&Vec<Object>> {
match self {
Object::Array(arr) => Ok(arr),
_ => Err(Error::ObjectType {
expected: "Array",
found: self.enum_variant(),
}),
}
}
pub fn as_array_mut(&mut self) -> Result<&mut Vec<Object>> {
match self {
Object::Array(arr) => Ok(arr),
_ => Err(Error::ObjectType {
expected: "Array",
found: self.enum_variant(),
}),
}
}
pub fn as_dict(&self) -> Result<&Dictionary> {
match self {
Object::Dictionary(dict) => Ok(dict),
_ => Err(Error::ObjectType {
expected: "Dictionary",
found: self.enum_variant(),
}),
}
}
pub fn as_dict_mut(&mut self) -> Result<&mut Dictionary> {
match self {
Object::Dictionary(dict) => Ok(dict),
_ => Err(Error::ObjectType {
expected: "Dictionary",
found: self.enum_variant(),
}),
}
}
pub fn as_stream(&self) -> Result<&Stream> {
match self {
Object::Stream(stream) => Ok(stream),
_ => Err(Error::ObjectType {
expected: "Stream",
found: self.enum_variant(),
}),
}
}
pub fn as_stream_mut(&mut self) -> Result<&mut Stream> {
match self {
Object::Stream(stream) => Ok(stream),
_ => Err(Error::ObjectType {
expected: "Stream",
found: self.enum_variant(),
}),
}
}
pub fn type_name(&self) -> Result<&[u8]> {
match self {
Object::Dictionary(dict) => dict.get_type(),
Object::Stream(stream) => stream.dict.get_type(),
obj => Err(Error::ObjectType {
expected: "Dictionary or Stream",
found: obj.enum_variant(),
}),
}
}
pub fn enum_variant(&self) -> &'static str {
match self {
Object::Null => "Null",
Object::Boolean(_) => "Boolean",
Object::Integer(_) => "Integer",
Object::Real(_) => "Real",
Object::Name(_) => "Name",
Object::String(_, _) => "String",
Object::Array(_) => "Array",
Object::Dictionary(_) => "Dictionary",
Object::Stream(_) => "Stream",
Object::Reference(_) => "Reference",
}
}
}
impl fmt::Debug for Object {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Object::Null => write!(f, "Null"),
Object::Boolean(value) => write!(f, "{value}"),
Object::Integer(value) => write!(f, "{value}"),
Object::Real(value) => write!(f, "{value}"),
Object::Name(name) => write!(f, "/{}", String::from_utf8_lossy(name)),
Object::String(text, StringFormat::Literal) => {
write!(f, "({})", String::from_utf8_lossy(text))
}
Object::String(text, StringFormat::Hexadecimal) => {
write!(f, "<")?;
for b in text {
write!(f, "{b:02x}")?
}
write!(f, ">")
}
Object::Array(array) => {
let items = array
.iter()
.map(|item| format!("{item:?}"))
.collect::<Vec<String>>();
write!(f, "[{}]", items.join(" "))
}
Object::Dictionary(dict) => write!(f, "{dict:?}"),
Object::Stream(stream) => write!(f, "{:?}stream...endstream", stream.dict),
Object::Reference(id) => write!(f, "{} {} R", id.0, id.1),
}
}
}
impl Dictionary {
pub fn new() -> Dictionary {
Dictionary(IndexMap::new())
}
pub fn has(&self, key: &[u8]) -> bool {
self.0.contains_key(key)
}
pub fn get(&self, key: &[u8]) -> Result<&Object> {
self.0
.get(key)
.ok_or(Error::DictKey(String::from_utf8_lossy(key).to_string()))
}
pub fn get_deref<'a>(&'a self, key: &[u8], doc: &'a Document) -> Result<&'a Object> {
doc.dereference(self.get(key)?).map(|(_, object)| object)
}
pub fn get_mut(&mut self, key: &[u8]) -> Result<&mut Object> {
self.0
.get_mut(key)
.ok_or(Error::DictKey(String::from_utf8_lossy(key).to_string()))
}
pub fn set<K, V>(&mut self, key: K, value: V)
where
K: Into<Vec<u8>>,
V: Into<Object>,
{
self.0.insert(key.into(), value.into());
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn is_empty(&self) -> bool {
self.0.len() == 0
}
pub fn remove(&mut self, key: &[u8]) -> Option<Object> {
self.0.swap_remove(key)
}
pub fn has_type(&self, type_name: &[u8]) -> bool {
self.get(b"Type").and_then(|s| s.as_name()).ok() == Some(type_name)
}
pub fn get_type(&self) -> Result<&[u8]> {
self.get(b"Type")
.and_then(Object::as_name)
.or_else(|_| self.get(b"Linearized").and(Ok(b"Linearized")))
}
pub fn iter(&'_ self) -> indexmap::map::Iter<'_, Vec<u8>, Object> {
self.0.iter()
}
pub fn iter_mut(&'_ mut self) -> indexmap::map::IterMut<'_, Vec<u8>, Object> {
self.0.iter_mut()
}
pub fn get_font_encoding(&'_ self, doc: &Document) -> Result<Encoding<'_>> {
if !self.has_type(b"Font") {
return Err(Error::DictType {
expected: "Font",
found: String::from_utf8_lossy(self.get_type().unwrap_or(b"None")).to_string(),
});
}
match self.get(b"Encoding").and_then(Object::as_name) {
Ok(b"StandardEncoding") => Ok(Encoding::OneByteEncoding(&encodings::STANDARD_ENCODING)),
Ok(b"MacRomanEncoding") => {
Ok(Encoding::OneByteEncoding(&encodings::MAC_ROMAN_ENCODING))
}
Ok(b"MacExpertEncoding") => {
Ok(Encoding::OneByteEncoding(&encodings::MAC_EXPERT_ENCODING))
}
Ok(b"WinAnsiEncoding") => Ok(Encoding::OneByteEncoding(&encodings::WIN_ANSI_ENCODING)),
Ok(b"PDFDocEncoding") => {
log::warn!("PDFDocEncoding is not a valid character encoding for a font");
Ok(Encoding::OneByteEncoding(&encodings::PDF_DOC_ENCODING))
}
Ok(b"Identity-H") | Ok(b"Identity-V") => {
let stream = self.get_deref(b"ToUnicode", doc)?.as_stream()?;
self.get_encoding_from_to_unicode_cmap(stream)
}
Ok(name) => Ok(Encoding::SimpleEncoding(name)),
Err(err) => {
warn!(
"Could not parse the encoding, error: {err:#?}\nFont: {self:#?}\nTrying to retrieve ToUnicode."
);
let stream = self
.get_deref(b"ToUnicode", doc)
.and_then(Object::as_stream);
if let Ok(stream) = stream {
return self.get_encoding_from_to_unicode_cmap(stream);
}
warn!("Using standard encoding as a fallback!");
Ok(Encoding::OneByteEncoding(&encodings::STANDARD_ENCODING))
}
}
}
fn get_encoding_from_to_unicode_cmap(&'_ self, stream: &Stream) -> Result<Encoding<'_>> {
let content = stream.get_plain_content()?;
let cmap = ToUnicodeCMap::parse(content)?;
Ok(Encoding::UnicodeMapEncoding(cmap))
}
pub fn extend(&mut self, other: &Dictionary) {
let keep_both_objects = |new_dict: &mut IndexMap<Vec<u8>, Object>,
key: &Vec<u8>,
value: &Object,
old_value: Object| {
let mut final_array;
match value {
Object::Array(array) => {
final_array = Vec::with_capacity(array.len() + 1);
final_array.push(old_value);
final_array.extend(array.to_owned());
}
_ => {
final_array = vec![value.to_owned(), old_value];
}
}
new_dict.insert(key.to_owned(), Object::Array(final_array));
};
let mut new_dict = std::mem::take(&mut self.0);
new_dict.reserve_exact(other.0.len());
for (key, value) in other.0.iter() {
if let Some(old_value) = new_dict.get(key) {
let old_value = old_value.to_owned();
match (&old_value, value) {
(Object::Dictionary(old_dict), Object::Dictionary(dict)) => {
let mut replaced_dict = old_dict.to_owned();
replaced_dict.extend(dict);
new_dict.insert(key.to_owned(), Object::Dictionary(replaced_dict));
}
(Object::Array(old_array), Object::Array(array)) => {
let mut replaced_array = old_array.to_owned();
replaced_array.extend(array.to_owned());
new_dict.insert(key.to_owned(), Object::Array(replaced_array));
}
(Object::Integer(old_id), Object::Integer(id)) => {
let array = vec![Object::Integer(*old_id), Object::Integer(*id)];
new_dict.insert(key.to_owned(), Object::Array(array));
}
(Object::Real(old_id), Object::Real(id)) => {
let array = vec![Object::Real(*old_id), Object::Real(*id)];
new_dict.insert(key.to_owned(), Object::Array(array));
}
(Object::String(old_ids, old_format), Object::String(ids, format)) => {
let array = vec![
Object::String(old_ids.to_owned(), old_format.to_owned()),
Object::String(ids.to_owned(), format.to_owned()),
];
new_dict.insert(key.to_owned(), Object::Array(array));
}
(Object::Reference(old_object_id), Object::Reference(object_id)) => {
let array = vec![
Object::Reference(*old_object_id),
Object::Reference(*object_id),
];
new_dict.insert(key.to_owned(), Object::Array(array));
}
(Object::Null, _)
| (Object::Boolean(_), _)
| (Object::Name(_), _)
| (Object::Stream(_), _) => {
new_dict.insert(key.to_owned(), old_value);
}
(_, _) => keep_both_objects(&mut new_dict, key, value, old_value),
}
} else {
new_dict.insert(key.to_owned(), value.to_owned());
}
}
self.0 = new_dict;
}
pub fn as_hashmap(&self) -> &IndexMap<Vec<u8>, Object> {
&self.0
}
pub fn as_hashmap_mut(&mut self) -> &mut IndexMap<Vec<u8>, Object> {
&mut self.0
}
}
#[macro_export]
macro_rules! dictionary {
() => {
$crate::Dictionary::new()
};
($( $key: expr => $value: expr ),+ ,) => {
dictionary!( $($key => $value),+ )
};
($( $key: expr => $value: expr ),*) => {{
let mut dict = $crate::Dictionary::new();
$(
dict.set($key, $value);
)*
dict
}}
}
impl fmt::Debug for Dictionary {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let entries = self
.into_iter()
.map(|(key, value)| format!("/{} {:?}", String::from_utf8_lossy(key), value))
.collect::<Vec<String>>();
write!(f, "<<{}>>", entries.concat())
}
}
impl IntoIterator for Dictionary {
type Item = (Vec<u8>, Object);
type IntoIter = indexmap::map::IntoIter<Vec<u8>, Object>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl<'a> IntoIterator for &'a Dictionary {
type Item = (&'a Vec<u8>, &'a Object);
type IntoIter = indexmap::map::Iter<'a, Vec<u8>, Object>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
impl<'a> IntoIterator for &'a mut Dictionary {
type Item = (&'a Vec<u8>, &'a mut Object);
type IntoIter = indexmap::map::IterMut<'a, Vec<u8>, Object>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter_mut()
}
}
use std::iter::FromIterator;
impl<K: Into<Vec<u8>>> FromIterator<(K, Object)> for Dictionary {
fn from_iter<I: IntoIterator<Item = (K, Object)>>(iter: I) -> Self {
let mut dict = Dictionary::new();
for (k, v) in iter {
dict.set(k, v);
}
dict
}
}
impl Stream {
pub fn new(mut dict: Dictionary, content: Vec<u8>) -> Stream {
dict.set("Length", content.len() as i64);
Stream {
dict,
content,
allows_compression: true,
start_position: None,
}
}
pub fn with_position(dict: Dictionary, position: usize) -> Stream {
Stream {
dict,
content: vec![],
allows_compression: true,
start_position: Some(position),
}
}
#[inline]
pub fn with_compression(mut self, allows_compression: bool) -> Stream {
self.allows_compression = allows_compression;
self
}
pub fn filters(&self) -> Result<Vec<&[u8]>> {
let filter = self.dict.get(b"Filter")?;
if let Ok(name) = filter.as_name() {
Ok(vec![name])
} else if let Ok(names) = filter.as_array() {
names.iter().map(Object::as_name).collect()
} else {
Err(Error::ObjectType {
expected: "Name or Array",
found: filter.enum_variant(),
})
}
}
pub fn set_content(&mut self, content: Vec<u8>) {
self.content = content;
self.dict.set("Length", self.content.len() as i64);
}
pub fn set_plain_content(&mut self, content: Vec<u8>) {
self.dict.remove(b"DecodeParms");
self.dict.remove(b"Filter");
self.dict.set("Length", content.len() as i64);
self.content = content;
}
pub fn get_plain_content(&self) -> Result<Vec<u8>> {
match self.filters() {
Ok(vec) if !vec.is_empty() => self.decompressed_content(),
_ => Ok(self.content.clone()),
}
}
pub fn compress(&mut self) -> Result<()> {
self.compress_with_level(9)
}
pub fn compress_with_level(&mut self, level: u32) -> Result<()> {
use flate2::Compression;
use flate2::write::ZlibEncoder;
use std::io::prelude::*;
if self.dict.get(b"Filter").is_err() {
let mut encoder = ZlibEncoder::new(Vec::new(), Compression::new(level));
encoder.write_all(self.content.as_slice())?;
let compressed = encoder.finish()?;
if compressed.len() + 19 < self.content.len() {
self.dict.set("Filter", "FlateDecode");
self.set_content(compressed);
}
}
Ok(())
}
pub fn decompressed_content(&self) -> Result<Vec<u8>> {
let params = self.dict.get(b"DecodeParms").and_then(Object::as_dict).ok();
let filters = self.filters()?;
let mut input = self.content.as_slice();
let mut output = vec![];
for filter in filters {
output = match filter {
b"FlateDecode" => Self::decompress_zlib(input, params)?,
b"LZWDecode" => Self::decompress_lzw(input, params)?,
b"ASCII85Decode" => Self::decode_ascii85(input)?,
b"ASCIIHexDecode" | b"AHx" => Self::decode_ascii_hex(input)?,
b"RunLengthDecode" | b"RL" => Self::decode_run_length(input)?,
#[cfg(feature = "embed_image")]
b"CCITTFaxDecode" | b"CCF" => Self::decode_ccitt_fax(input, params)?,
#[cfg(feature = "embed_image")]
b"JBIG2Decode" => Self::decode_jbig2(input)?,
#[cfg(feature = "embed_image")]
b"JPXDecode" => Self::decode_jpx(input)?,
b"DCTDecode" | b"DCT" => input.to_vec(), _ => return Err(Error::Unimplemented("decompression algorithms")),
};
input = &output;
}
Ok(output)
}
fn decompress_lzw(input: &[u8], params: Option<&Dictionary>) -> Result<Vec<u8>> {
use weezl::{BitOrder, decode::Decoder};
const MIN_BITS: u8 = 9;
let early_change = params
.and_then(|p| p.get(b"EarlyChange").ok())
.and_then(|p| Object::as_i64(p).ok())
.map(|v| v != 0)
.unwrap_or(true);
let mut decoder = if early_change {
Decoder::with_tiff_size_switch(BitOrder::Msb, MIN_BITS - 1)
} else {
Decoder::new(BitOrder::Msb, MIN_BITS - 1)
};
let output = Self::decompress_lzw_loop(input, &mut decoder)?;
Self::decompress_predictor(output, params)
}
fn decompress_lzw_loop(input: &[u8], decoder: &mut weezl::decode::Decoder) -> Result<Vec<u8>> {
let mut output = vec![];
let result = decoder.into_stream(&mut output).decode_all(input);
if let Err(err) = result.status {
warn!("{err}");
}
if output.len() > MAX_DECOMPRESSED_BYTES {
return Err(Error::StreamTooLarge {
limit: MAX_DECOMPRESSED_BYTES,
});
}
Ok(output)
}
fn decompress_zlib(input: &[u8], params: Option<&Dictionary>) -> Result<Vec<u8>> {
use flate2::read::ZlibDecoder;
use std::io::prelude::*;
let mut output = Vec::with_capacity(input.len().min(4096) * 2);
let decoder = ZlibDecoder::new(input);
if !input.is_empty() {
decoder
.take(MAX_DECOMPRESSED_BYTES as u64 + 1)
.read_to_end(&mut output)
.unwrap_or_else(|err| {
warn!("{err}");
0
});
if output.len() > MAX_DECOMPRESSED_BYTES {
return Err(Error::StreamTooLarge {
limit: MAX_DECOMPRESSED_BYTES,
});
}
}
Self::decompress_predictor(output, params)
}
fn decode_ascii85(input: &[u8]) -> Result<Vec<u8>> {
let mut output = vec![];
let mut buffer: u32 = 0;
let mut count = 0;
let input_no_eod = if input.len() >= 2 && &input[input.len() - 2..] == b"~>" {
&input[..input.len() - 2]
} else {
log::warn!("ASCII85 stream is missing its EOD marker");
input
};
for &ch in input_no_eod {
if ch == b'z' {
if count != 0 {
return Err(DecompressError::Ascii85(
"z character is not allowed in the middle of a group",
)
.into());
}
output.extend_from_slice(&[0, 0, 0, 0]);
continue;
}
if ch.is_ascii_whitespace() {
continue;
}
if !(b'!'..=b'u').contains(&ch) {
break;
}
buffer = buffer
.checked_mul(85)
.ok_or(DecompressError::Ascii85("multiplication overflow"))?;
buffer += (ch - b'!') as u32;
count += 1;
if count == 5 {
output.extend_from_slice(&buffer.to_be_bytes());
buffer = 0;
count = 0;
}
}
if count > 0 {
for _ in count..5 {
buffer = buffer
.checked_mul(85)
.ok_or(DecompressError::Ascii85("multiplication overflow"))?;
buffer += 84;
}
let bytes = buffer.to_be_bytes();
output.extend_from_slice(&bytes[..count - 1]);
}
Ok(output)
}
fn decode_ascii_hex(input: &[u8]) -> Result<Vec<u8>> {
let mut output = Vec::with_capacity(input.len() / 2);
let mut hi: Option<u8> = None;
for &ch in input {
if ch == b'>' {
break; }
if ch.is_ascii_whitespace() {
continue;
}
let nibble = match ch {
b'0'..=b'9' => ch - b'0',
b'A'..=b'F' => ch - b'A' + 10,
b'a'..=b'f' => ch - b'a' + 10,
_ => return Err(DecompressError::AsciiHex("invalid hex digit").into()),
};
match hi {
None => hi = Some(nibble),
Some(h) => {
output.push((h << 4) | nibble);
hi = None;
}
}
}
if let Some(h) = hi {
output.push(h << 4);
}
Ok(output)
}
fn decode_run_length(input: &[u8]) -> Result<Vec<u8>> {
let mut output = Vec::new();
let mut i = 0;
while i < input.len() {
let length = input[i];
i += 1;
match length {
128 => break, 0..=127 => {
let count = length as usize + 1;
let end = (i + count).min(input.len());
output.extend_from_slice(&input[i..end]);
i = end;
}
_ => {
if i >= input.len() {
break;
}
let count = 257 - length as usize;
let byte = input[i];
i += 1;
output.extend(std::iter::repeat_n(byte, count));
}
}
}
Ok(output)
}
#[cfg(feature = "embed_image")]
fn decode_ccitt_fax(input: &[u8], params: Option<&Dictionary>) -> Result<Vec<u8>> {
let k = params
.and_then(|p| p.get(b"K").ok())
.and_then(|o| Object::as_i64(o).ok())
.unwrap_or(0);
let columns = params
.and_then(|p| p.get(b"Columns").ok())
.and_then(|o| Object::as_i64(o).ok())
.unwrap_or(1728) as u32;
let rows = params
.and_then(|p| p.get(b"Rows").ok())
.and_then(|o| Object::as_i64(o).ok())
.unwrap_or(0) as u32;
let end_of_block = params
.and_then(|p| p.get(b"EndOfBlock").ok())
.and_then(|o| Object::as_bool(o).ok())
.unwrap_or(true);
let end_of_line = params
.and_then(|p| p.get(b"EndOfLine").ok())
.and_then(|o| Object::as_bool(o).ok())
.unwrap_or(false);
let byte_align = params
.and_then(|p| p.get(b"EncodedByteAlign").ok())
.and_then(|o| Object::as_bool(o).ok())
.unwrap_or(false);
let black_is_1 = params
.and_then(|p| p.get(b"BlackIs1").ok())
.and_then(|o| Object::as_bool(o).ok())
.unwrap_or(false);
let encoding = if k < 0 {
hayro_ccitt::EncodingMode::Group4
} else if k == 0 {
hayro_ccitt::EncodingMode::Group3_1D
} else {
hayro_ccitt::EncodingMode::Group3_2D { k: k as u32 }
};
let settings = hayro_ccitt::DecodeSettings {
columns,
rows,
end_of_block,
end_of_line,
rows_are_byte_aligned: byte_align,
encoding,
invert_black: black_is_1,
};
struct ByteDecoder {
output: Vec<u8>,
buffer: u8,
bit_count: u8,
}
impl ByteDecoder {
fn flush(&mut self) {
if self.bit_count > 0 {
self.output.push(self.buffer << (8 - self.bit_count));
self.buffer = 0;
self.bit_count = 0;
}
}
}
impl hayro_ccitt::Decoder for ByteDecoder {
fn push_pixel(&mut self, white: bool) {
self.buffer = (self.buffer << 1) | u8::from(white);
self.bit_count += 1;
if self.bit_count == 8 {
self.output.push(self.buffer);
self.buffer = 0;
self.bit_count = 0;
}
}
fn push_pixel_chunk(&mut self, white: bool, chunk_count: u32) {
let byte = if white { 0xFF } else { 0x00 };
self.output
.extend(std::iter::repeat_n(byte, chunk_count as usize));
}
fn next_line(&mut self) {
self.flush();
}
}
let mut decoder = ByteDecoder {
output: Vec::new(),
buffer: 0,
bit_count: 0,
};
match hayro_ccitt::decode(input, &mut decoder, &settings) {
Ok(_) => Ok(decoder.output),
Err(_) if !decoder.output.is_empty() => {
Ok(decoder.output)
}
Err(_) => Err(Error::Unimplemented("CCITTFaxDecode failed")),
}
}
#[cfg(feature = "embed_image")]
fn decode_jbig2(input: &[u8]) -> Result<Vec<u8>> {
let image = hayro_jbig2::decode_embedded(input, None)
.map_err(|_| Error::Unimplemented("JBIG2Decode failed"))?;
let row_bytes = (image.width as usize).div_ceil(8);
let mut packed = vec![0u8; row_bytes * image.height as usize];
struct InvertDecoder<'a> {
data: &'a mut [u8],
pos: usize,
buffer: u8,
bit_count: u8,
}
impl hayro_jbig2::Decoder for InvertDecoder<'_> {
fn push_pixel(&mut self, black: bool) {
self.buffer = (self.buffer << 1) | u8::from(!black);
self.bit_count += 1;
if self.bit_count == 8 {
if self.pos < self.data.len() {
self.data[self.pos] = self.buffer;
}
self.pos += 1;
self.buffer = 0;
self.bit_count = 0;
}
}
fn push_pixel_chunk(&mut self, black: bool, chunk_count: u32) {
let byte = if black { 0x00 } else { 0xFF };
let end = (self.pos + chunk_count as usize).min(self.data.len());
for b in &mut self.data[self.pos..end] {
*b = byte;
}
self.pos = end;
}
fn next_line(&mut self) {
if self.bit_count > 0 {
if self.pos < self.data.len() {
self.data[self.pos] = self.buffer << (8 - self.bit_count);
}
self.pos += 1;
self.buffer = 0;
self.bit_count = 0;
}
}
}
let mut decoder = InvertDecoder {
data: &mut packed,
pos: 0,
buffer: 0,
bit_count: 0,
};
image.decode(&mut decoder);
Ok(packed)
}
#[cfg(feature = "embed_image")]
fn decode_jpx(input: &[u8]) -> Result<Vec<u8>> {
let settings = hayro_jpeg2000::DecodeSettings {
resolve_palette_indices: false,
strict: false,
target_resolution: None,
};
let image = hayro_jpeg2000::Image::new(input, &settings)
.map_err(|_| Error::Unimplemented("JPXDecode failed"))?;
image
.decode()
.map_err(|_| Error::Unimplemented("JPXDecode failed"))
}
fn decompress_predictor(mut data: Vec<u8>, params: Option<&Dictionary>) -> Result<Vec<u8>> {
use crate::filters::png;
if let Some(params) = params {
let predictor = params
.get(b"Predictor")
.and_then(Object::as_i64)
.unwrap_or(1);
if (10..=15).contains(&predictor) {
let pixels_per_row = max(
1,
params.get(b"Columns").and_then(Object::as_i64).unwrap_or(1),
) as usize;
let colors = max(
1,
params.get(b"Colors").and_then(Object::as_i64).unwrap_or(1),
) as usize;
let bits = max(
8,
params
.get(b"BitsPerComponent")
.and_then(Object::as_i64)
.unwrap_or(8),
) as usize;
let bytes_per_pixel = colors * bits / 8;
data = png::decode_frame(data.as_slice(), bytes_per_pixel, pixels_per_row)?;
}
Ok(data)
} else {
Ok(data)
}
}
pub fn decompress(&mut self) -> Result<()> {
let data = self.decompressed_content()?;
self.dict.remove(b"DecodeParms");
self.dict.remove(b"Filter");
self.set_content(data);
Ok(())
}
pub fn is_compressed(&self) -> bool {
self.dict.get(b"Filter").is_ok()
}
}
#[cfg(test)]
mod test {
use crate::{Error, error::DecompressError};
use super::{MAX_DECOMPRESSED_BYTES, Stream};
#[test]
fn test_decode_ascii85() {
let input = r#"9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,O<
DJ+*.@<*K0@<6L(Df-\0Ec5e;DffZ(EZee.Bl.9pF"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKYi(
DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIal(
DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G>u
D.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>"#;
let expected = "Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.";
let output = Stream::decode_ascii85(input.as_bytes()).unwrap();
println!("{}", String::from_utf8(output.clone()).unwrap());
assert_eq!(&output, expected.as_bytes());
}
#[test]
fn test_decode_ascii85_overflow() {
let input = b"uuuuu~>";
let output = Stream::decode_ascii85(input);
assert!(matches!(
output,
Err(Error::Decompress(DecompressError::Ascii85(_)))
));
}
#[test]
fn test_decode_ascii_hex() {
let input = b"48656C6C6F>";
let output = Stream::decode_ascii_hex(input).unwrap();
assert_eq!(output, b"Hello");
}
#[test]
fn test_decode_ascii_hex_lowercase() {
let input = b"48656c6c6f>";
let output = Stream::decode_ascii_hex(input).unwrap();
assert_eq!(output, b"Hello");
}
#[test]
fn test_decode_ascii_hex_whitespace() {
let input = b"48 65 6C 6C 6F>";
let output = Stream::decode_ascii_hex(input).unwrap();
assert_eq!(output, b"Hello");
}
#[test]
fn test_decode_ascii_hex_odd_nibble() {
let input = b"ABC>";
let output = Stream::decode_ascii_hex(input).unwrap();
assert_eq!(output, vec![0xAB, 0xC0]);
}
#[test]
fn test_decode_run_length() {
let input = vec![4, 10, 11, 12, 13, 14, 253, 3, 128];
let output = Stream::decode_run_length(&input).unwrap();
assert_eq!(output, vec![10, 11, 12, 13, 14, 3, 3, 3, 3]);
}
#[test]
fn test_decode_run_length_eod() {
let input = vec![0, 42, 128, 0, 99];
let output = Stream::decode_run_length(&input).unwrap();
assert_eq!(output, vec![42]);
}
#[test]
fn test_decode_run_length_repeat() {
let input = vec![255, 0xAA, 128];
let output = Stream::decode_run_length(&input).unwrap();
assert_eq!(output, vec![0xAA, 0xAA]);
}
#[test]
fn decompress_zlib_within_limit_succeeds() {
use flate2::Compression;
use flate2::write::ZlibEncoder;
use std::io::Write;
let plaintext = vec![0u8; 100];
let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&plaintext).unwrap();
let compressed = encoder.finish().unwrap();
let result = Stream::decompress_zlib(&compressed, None);
assert!(
result.is_ok(),
"small stream should decompress successfully"
);
assert_eq!(result.unwrap(), plaintext);
}
#[test]
fn stream_too_large_error_has_correct_limit() {
let err = crate::Error::StreamTooLarge {
limit: MAX_DECOMPRESSED_BYTES,
};
assert!(
matches!(err, crate::Error::StreamTooLarge { limit } if limit == MAX_DECOMPRESSED_BYTES),
"StreamTooLarge must carry MAX_DECOMPRESSED_BYTES as the limit"
);
}
}