use crate::error::{Error, Result};
use crate::extractors::ccitt_bilevel;
use crate::geometry::Rect;
use crate::object::ObjectRef;
use std::cmp::min;
use std::path::Path;
#[derive(Debug, Clone, PartialEq, serde::Serialize)]
pub struct PdfImage {
width: u32,
height: u32,
color_space: ColorSpace,
bits_per_component: u8,
#[serde(skip_serializing_if = "ImageData::is_empty")]
data: ImageData,
bbox: Option<Rect>,
rotation_degrees: i32,
matrix: [f32; 6],
#[serde(skip)]
ccitt_params: Option<crate::decoders::CcittParams>,
}
impl PdfImage {
pub fn new(
width: u32,
height: u32,
color_space: ColorSpace,
bits_per_component: u8,
data: ImageData,
) -> Self {
Self {
width,
height,
color_space,
bits_per_component,
data,
bbox: None,
rotation_degrees: 0,
matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
ccitt_params: None,
}
}
pub fn with_spatial(
width: u32,
height: u32,
color_space: ColorSpace,
bits_per_component: u8,
data: ImageData,
bbox: Rect,
rotation: i32,
matrix: [f32; 6],
) -> Self {
Self {
width,
height,
color_space,
bits_per_component,
data,
bbox: Some(bbox),
rotation_degrees: rotation,
matrix,
ccitt_params: None,
}
}
pub fn with_bbox(
width: u32,
height: u32,
color_space: ColorSpace,
bits_per_component: u8,
data: ImageData,
bbox: Rect,
) -> Self {
Self::with_spatial(
width,
height,
color_space,
bits_per_component,
data,
bbox,
0,
[1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
)
}
pub fn with_ccitt_params(
width: u32,
height: u32,
color_space: ColorSpace,
bits_per_component: u8,
data: ImageData,
ccitt_params: crate::decoders::CcittParams,
) -> Self {
Self {
width,
height,
color_space,
bits_per_component,
data,
bbox: None,
rotation_degrees: 0,
matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
ccitt_params: Some(ccitt_params),
}
}
pub fn width(&self) -> u32 {
self.width
}
pub fn height(&self) -> u32 {
self.height
}
pub fn color_space(&self) -> &ColorSpace {
&self.color_space
}
pub fn bits_per_component(&self) -> u8 {
self.bits_per_component
}
pub fn data(&self) -> &ImageData {
&self.data
}
pub fn bbox(&self) -> Option<&Rect> {
self.bbox.as_ref()
}
pub fn set_bbox(&mut self, bbox: Rect) {
self.bbox = Some(bbox);
}
pub fn rotation_degrees(&self) -> i32 {
self.rotation_degrees
}
pub fn set_rotation_degrees(&mut self, rotation: i32) {
self.rotation_degrees = rotation;
}
pub fn matrix(&self) -> [f32; 6] {
self.matrix
}
pub fn set_matrix(&mut self, matrix: [f32; 6]) {
self.matrix = matrix;
}
pub fn set_ccitt_params(&mut self, params: crate::decoders::CcittParams) {
self.ccitt_params = Some(params);
}
pub fn ccitt_params(&self) -> Option<&crate::decoders::CcittParams> {
self.ccitt_params.as_ref()
}
pub fn save_as_png(&self, path: impl AsRef<Path>) -> Result<()> {
match &self.data {
ImageData::Jpeg(jpeg_data) => save_jpeg_as_png(jpeg_data, path),
ImageData::Raw { pixels, format } => {
save_raw_as_png(pixels, self.width, self.height, *format, path)
},
}
}
pub fn save_as_jpeg(&self, path: impl AsRef<Path>) -> Result<()> {
match &self.data {
ImageData::Jpeg(jpeg_data) => std::fs::write(path, jpeg_data).map_err(Error::from),
ImageData::Raw { pixels, format } => {
save_raw_as_jpeg(pixels, self.width, self.height, *format, path)
},
}
}
pub fn to_png_bytes(&self) -> Result<Vec<u8>> {
use image::codecs::png::{CompressionType, FilterType, PngEncoder};
use image::ImageEncoder;
use std::io::Cursor;
let mut buffer = Cursor::new(Vec::new());
let encoder =
PngEncoder::new_with_quality(&mut buffer, CompressionType::Fast, FilterType::NoFilter);
match &self.data {
ImageData::Raw { pixels, format } => {
let expected_gray = (self.width * self.height) as usize;
let expected_rgb = expected_gray * 3;
if *format == PixelFormat::Grayscale
&& matches!(self.color_space, ColorSpace::DeviceGray | ColorSpace::CalGray)
&& pixels.len() == expected_gray
{
encoder
.write_image(pixels, self.width, self.height, image::ColorType::L8)
.map_err(|e| Error::Encode(format!("Failed to encode PNG: {}", e)))?;
} else if *format == PixelFormat::RGB && pixels.len() == expected_rgb {
encoder
.write_image(pixels, self.width, self.height, image::ColorType::Rgb8)
.map_err(|e| Error::Encode(format!("Failed to encode PNG: {}", e)))?;
} else {
let dynamic_image = self.to_dynamic_image()?;
let rgb = dynamic_image.to_rgb8();
encoder
.write_image(rgb.as_raw(), self.width, self.height, image::ColorType::Rgb8)
.map_err(|e| Error::Encode(format!("Failed to encode PNG: {}", e)))?;
}
},
ImageData::Jpeg(_) => {
let dynamic_image = self.to_dynamic_image()?;
let rgb = dynamic_image.to_rgb8();
encoder
.write_image(rgb.as_raw(), self.width, self.height, image::ColorType::Rgb8)
.map_err(|e| Error::Encode(format!("Failed to encode PNG: {}", e)))?;
},
}
Ok(buffer.into_inner())
}
pub fn to_base64_data_uri(&self) -> Result<String> {
use base64::{engine::general_purpose::STANDARD, Engine};
match &self.data {
ImageData::Jpeg(jpeg_data) => {
let base64_str = STANDARD.encode(jpeg_data);
Ok(format!("data:image/jpeg;base64,{}", base64_str))
},
ImageData::Raw { .. } => {
let png_bytes = self.to_png_bytes()?;
let base64_str = STANDARD.encode(&png_bytes);
Ok(format!("data:image/png;base64,{}", base64_str))
},
}
}
pub fn to_dynamic_image(&self) -> Result<image::DynamicImage> {
match &self.data {
ImageData::Jpeg(jpeg_data) => {
log::debug!(
"Decoding JPEG data ({} bytes), starts with: {:02X?}",
jpeg_data.len(),
&jpeg_data[..min(jpeg_data.len(), 16)]
);
image::load_from_memory(jpeg_data)
.map_err(|e| Error::Decode(format!("Failed to decode JPEG: {}", e)))
},
ImageData::Raw { pixels, format } => {
if self.bits_per_component == 1
&& matches!(self.color_space, ColorSpace::DeviceGray)
{
let params =
self.ccitt_params
.clone()
.unwrap_or_else(|| crate::decoders::CcittParams {
columns: self.width,
rows: Some(self.height),
..Default::default()
});
let decompressed = ccitt_bilevel::decompress_ccitt(pixels, ¶ms)?;
let grayscale =
ccitt_bilevel::bilevel_to_grayscale(&decompressed, self.width, self.height);
image::ImageBuffer::<image::Luma<u8>, Vec<u8>>::from_raw(
self.width,
self.height,
grayscale,
)
.ok_or_else(|| Error::Decode("Invalid image dimensions".to_string()))
.map(image::DynamicImage::ImageLuma8)
} else {
match (format, self.color_space) {
(PixelFormat::RGB, ColorSpace::DeviceRGB) => {
image::ImageBuffer::<image::Rgb<u8>, Vec<u8>>::from_raw(
self.width,
self.height,
pixels.clone(),
)
.ok_or_else(|| Error::Decode("Invalid image dimensions".to_string()))
.map(image::DynamicImage::ImageRgb8)
},
(PixelFormat::Grayscale, ColorSpace::DeviceGray) => {
image::ImageBuffer::<image::Luma<u8>, Vec<u8>>::from_raw(
self.width,
self.height,
pixels.clone(),
)
.ok_or_else(|| Error::Decode("Invalid image dimensions".to_string()))
.map(image::DynamicImage::ImageLuma8)
},
_ => {
let rgb_pixels = match format {
PixelFormat::Grayscale => {
pixels.iter().flat_map(|&g| vec![g, g, g]).collect()
},
PixelFormat::CMYK => cmyk_to_rgb(pixels),
PixelFormat::RGB => pixels.clone(),
};
image::ImageBuffer::<image::Rgb<u8>, Vec<u8>>::from_raw(
self.width,
self.height,
rgb_pixels,
)
.ok_or_else(|| Error::Decode("Invalid image dimensions".to_string()))
.map(image::DynamicImage::ImageRgb8)
},
}
}
},
}
}
}
#[derive(Debug, Clone, PartialEq, serde::Serialize)]
#[serde(untagged)]
pub enum ImageData {
Jpeg(Vec<u8>),
Raw {
pixels: Vec<u8>,
format: PixelFormat,
},
}
impl ImageData {
pub fn is_empty(&self) -> bool {
match self {
ImageData::Jpeg(data) => data.is_empty(),
ImageData::Raw { pixels, .. } => pixels.is_empty(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
pub enum ColorSpace {
DeviceRGB,
DeviceGray,
DeviceCMYK,
Indexed,
CalGray,
CalRGB,
Lab,
ICCBased(usize),
Separation,
DeviceN,
Pattern,
}
impl ColorSpace {
pub fn components(&self) -> usize {
match self {
ColorSpace::DeviceGray => 1,
ColorSpace::DeviceRGB => 3,
ColorSpace::DeviceCMYK => 4,
ColorSpace::Indexed => 1,
ColorSpace::CalGray => 1,
ColorSpace::CalRGB => 3,
ColorSpace::Lab => 3,
ColorSpace::ICCBased(n) => *n,
ColorSpace::Separation => 1,
ColorSpace::DeviceN => 4,
ColorSpace::Pattern => 0,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
#[allow(clippy::upper_case_acronyms)]
pub enum PixelFormat {
RGB,
Grayscale,
CMYK,
}
impl PixelFormat {
pub fn bytes_per_pixel(&self) -> usize {
match self {
PixelFormat::Grayscale => 1,
PixelFormat::RGB => 3,
PixelFormat::CMYK => 4,
}
}
}
fn color_space_to_pixel_format(color_space: &ColorSpace) -> PixelFormat {
match color_space {
ColorSpace::DeviceGray => PixelFormat::Grayscale,
ColorSpace::DeviceRGB => PixelFormat::RGB,
ColorSpace::DeviceCMYK => PixelFormat::CMYK,
ColorSpace::Indexed => PixelFormat::RGB,
ColorSpace::CalGray => PixelFormat::Grayscale,
ColorSpace::CalRGB => PixelFormat::RGB,
ColorSpace::Lab => PixelFormat::RGB,
ColorSpace::ICCBased(n) => match n {
1 => PixelFormat::Grayscale,
3 => PixelFormat::RGB,
4 => PixelFormat::CMYK,
_ => PixelFormat::RGB,
},
ColorSpace::Separation => PixelFormat::Grayscale,
ColorSpace::DeviceN => PixelFormat::CMYK,
ColorSpace::Pattern => PixelFormat::RGB,
}
}
pub fn parse_color_space(obj: &crate::object::Object) -> Result<ColorSpace> {
use crate::object::Object;
match obj {
Object::Name(name) => match name.as_str() {
"DeviceRGB" => Ok(ColorSpace::DeviceRGB),
"DeviceGray" => Ok(ColorSpace::DeviceGray),
"DeviceCMYK" => Ok(ColorSpace::DeviceCMYK),
"Pattern" => Ok(ColorSpace::Pattern),
other => Err(Error::Image(format!("Unsupported color space: {}", other))),
},
Object::Array(arr) if !arr.is_empty() => {
if let Some(name) = arr[0].as_name() {
match name {
"Indexed" => Ok(ColorSpace::Indexed),
"CalGray" => Ok(ColorSpace::CalGray),
"CalRGB" => Ok(ColorSpace::CalRGB),
"Lab" => Ok(ColorSpace::Lab),
"ICCBased" => {
let num_components = if arr.len() > 1 {
if let Some(stream_dict) = arr[1].as_dict() {
stream_dict
.get("N")
.and_then(|obj| match obj {
Object::Integer(n) => Some(*n as usize),
_ => None,
})
.unwrap_or(3)
} else {
3
}
} else {
3
};
Ok(ColorSpace::ICCBased(num_components))
},
"Separation" => Ok(ColorSpace::Separation),
"DeviceN" => Ok(ColorSpace::DeviceN),
"Pattern" => Ok(ColorSpace::Pattern),
other => Err(Error::Image(format!("Unsupported array color space: {}", other))),
}
} else {
Err(Error::Image("Color space array must start with a name".to_string()))
}
},
_ => Err(Error::Image(format!("Invalid color space object: {:?}", obj))),
}
}
pub fn extract_image_from_xobject(
mut doc: Option<&mut crate::document::PdfDocument>,
xobject: &crate::object::Object,
obj_ref: Option<ObjectRef>,
color_space_map: Option<&std::collections::HashMap<String, crate::object::Object>>,
) -> Result<PdfImage> {
use crate::object::Object;
let dict = xobject
.as_dict()
.ok_or_else(|| Error::Image("XObject is not a stream".to_string()))?;
let subtype = dict
.get("Subtype")
.and_then(|obj| obj.as_name())
.ok_or_else(|| Error::Image("XObject missing /Subtype".to_string()))?;
if subtype != "Image" {
return Err(Error::Image(format!("XObject subtype is not Image: {}", subtype)));
}
let width = dict
.get("Width")
.and_then(|obj| obj.as_integer())
.ok_or_else(|| Error::Image("Image missing /Width".to_string()))? as u32;
let height = dict
.get("Height")
.and_then(|obj| obj.as_integer())
.ok_or_else(|| Error::Image("Image missing /Height".to_string()))? as u32;
let bits_per_component = dict
.get("BitsPerComponent")
.and_then(|obj| obj.as_integer())
.unwrap_or(8) as u8;
let color_space_obj = dict
.get("ColorSpace")
.ok_or_else(|| Error::Image("Image missing /ColorSpace".to_string()))?;
let resolved_color_space = if let Some(ref mut d) = doc {
let res = if let Some(obj_ref) = color_space_obj.as_reference() {
d.load_object(obj_ref)?
} else {
color_space_obj.clone()
};
if let Object::Name(ref name) = res {
if let Some(map) = color_space_map {
map.get(name).cloned().unwrap_or(res)
} else {
res
}
} else {
res
}
} else {
color_space_obj.clone()
};
let color_space = parse_color_space(&resolved_color_space)?;
let filter_names = if let Some(filter_obj) = dict.get("Filter") {
match filter_obj {
Object::Name(name) => vec![name.clone()],
Object::Array(filters) => filters
.iter()
.filter_map(|f| f.as_name().map(String::from))
.collect(),
_ => vec![],
}
} else {
vec![]
};
let has_dct = filter_names.iter().any(|name| name == "DCTDecode");
let is_jpeg_only = has_dct && filter_names.len() == 1;
let is_jpeg_chain = has_dct && filter_names.len() > 1;
let mut ccitt_params_override: Option<crate::decoders::CcittParams> = None;
if (filter_names.contains(&"JBIG2Decode".to_string())
|| filter_names.contains(&"Jbig2Decode".to_string()))
&& bits_per_component == 1
{
let mut ccitt_params =
crate::object::extract_ccitt_params_with_width(dict.get("DecodeParms"), Some(width));
if let Some(ref mut params) = ccitt_params {
if params.rows.is_none() {
params.rows = Some(height);
}
ccitt_params_override = ccitt_params;
}
}
let data = if is_jpeg_only || is_jpeg_chain {
let decoded = if let (Some(d), Some(ref_id)) = (doc.as_mut(), obj_ref) {
d.decode_stream_with_encryption(xobject, ref_id)?
} else {
xobject.decode_stream_data()?
};
ImageData::Jpeg(decoded)
} else if ccitt_params_override.is_some() {
match xobject {
Object::Stream { data, .. } => ImageData::Raw {
pixels: data.to_vec(),
format: PixelFormat::Grayscale,
},
_ => return Err(Error::Image("XObject is not a stream".to_string())),
}
} else {
let decoded_data = if let (Some(d), Some(ref_id)) = (doc.as_mut(), obj_ref) {
d.decode_stream_with_encryption(xobject, ref_id)?
} else {
xobject.decode_stream_data()?
};
let pixel_format = color_space_to_pixel_format(&color_space);
ImageData::Raw {
pixels: decoded_data,
format: pixel_format,
}
};
let mut image = PdfImage::new(width, height, color_space, bits_per_component, data);
if let Some(ccitt_params) = ccitt_params_override {
image.set_ccitt_params(ccitt_params);
} else if bits_per_component == 1 && image.color_space == ColorSpace::DeviceGray {
if let Some(mut ccitt_params) =
crate::object::extract_ccitt_params_with_width(dict.get("DecodeParms"), Some(width))
{
if ccitt_params.rows.is_none() {
ccitt_params.rows = Some(height);
}
image.set_ccitt_params(ccitt_params);
}
}
Ok(image)
}
pub fn cmyk_to_rgb(cmyk: &[u8]) -> Vec<u8> {
let mut rgb = Vec::with_capacity((cmyk.len() / 4) * 3);
for chunk in cmyk.chunks_exact(4) {
let c = chunk[0] as f32 / 255.0;
let m = chunk[1] as f32 / 255.0;
let y = chunk[2] as f32 / 255.0;
let k = chunk[3] as f32 / 255.0;
let r = ((1.0 - c) * (1.0 - k) * 255.0) as u8;
let g = ((1.0 - m) * (1.0 - k) * 255.0) as u8;
let b = ((1.0 - y) * (1.0 - k) * 255.0) as u8;
rgb.push(r);
rgb.push(g);
rgb.push(b);
}
rgb
}
fn save_jpeg_as_png(jpeg_data: &[u8], path: impl AsRef<Path>) -> Result<()> {
use image::ImageFormat;
let img = image::load_from_memory_with_format(jpeg_data, ImageFormat::Jpeg)
.map_err(|e| Error::Image(format!("Failed to decode JPEG: {}", e)))?;
img.save_with_format(path, ImageFormat::Png)
.map_err(|e| Error::Image(format!("Failed to save PNG: {}", e)))
}
fn save_raw_as_png(
pixels: &[u8],
width: u32,
height: u32,
format: PixelFormat,
path: impl AsRef<Path>,
) -> Result<()> {
use image::{ImageBuffer, ImageFormat, Luma, Rgb};
match format {
PixelFormat::RGB => {
let img = ImageBuffer::<Rgb<u8>, _>::from_raw(width, height, pixels.to_vec())
.ok_or_else(|| Error::Image("Invalid RGB image dimensions".to_string()))?;
img.save_with_format(path, ImageFormat::Png)
.map_err(|e| Error::Image(format!("Failed to save PNG: {}", e)))
},
PixelFormat::Grayscale => {
let img = ImageBuffer::<Luma<u8>, _>::from_raw(width, height, pixels.to_vec())
.ok_or_else(|| Error::Image("Invalid grayscale image dimensions".to_string()))?;
img.save_with_format(path, ImageFormat::Png)
.map_err(|e| Error::Image(format!("Failed to save PNG: {}", e)))
},
PixelFormat::CMYK => {
let rgb = cmyk_to_rgb(pixels);
let img = ImageBuffer::<Rgb<u8>, _>::from_raw(width, height, rgb)
.ok_or_else(|| Error::Image("Invalid CMYK image dimensions".to_string()))?;
img.save_with_format(path, ImageFormat::Png)
.map_err(|e| Error::Image(format!("Failed to save PNG: {}", e)))
},
}
}
fn save_raw_as_jpeg(
pixels: &[u8],
width: u32,
height: u32,
format: PixelFormat,
path: impl AsRef<Path>,
) -> Result<()> {
use image::{ImageBuffer, ImageFormat, Luma, Rgb};
match format {
PixelFormat::RGB => {
let img = ImageBuffer::<Rgb<u8>, _>::from_raw(width, height, pixels.to_vec())
.ok_or_else(|| Error::Image("Invalid RGB image dimensions".to_string()))?;
img.save_with_format(path, ImageFormat::Jpeg)
.map_err(|e| Error::Image(format!("Failed to save JPEG: {}", e)))
},
PixelFormat::Grayscale => {
let img = ImageBuffer::<Luma<u8>, _>::from_raw(width, height, pixels.to_vec())
.ok_or_else(|| Error::Image("Invalid grayscale image dimensions".to_string()))?;
img.save_with_format(path, ImageFormat::Jpeg)
.map_err(|e| Error::Image(format!("Failed to save JPEG: {}", e)))
},
PixelFormat::CMYK => {
let rgb = cmyk_to_rgb(pixels);
let img = ImageBuffer::<Rgb<u8>, _>::from_raw(width, height, rgb)
.ok_or_else(|| Error::Image("Invalid CMYK image dimensions".to_string()))?;
img.save_with_format(path, ImageFormat::Jpeg)
.map_err(|e| Error::Image(format!("Failed to save JPEG: {}", e)))
},
}
}
pub fn expand_inline_image_dict(
dict: std::collections::HashMap<String, crate::object::Object>,
) -> std::collections::HashMap<String, crate::object::Object> {
use std::collections::HashMap;
let mut expanded = HashMap::new();
for (key, value) in dict {
let expanded_key = match key.as_str() {
"W" => "Width",
"H" => "Height",
"CS" => "ColorSpace",
"BPC" => "BitsPerComponent",
"F" => "Filter",
"DP" => "DecodeParms",
"IM" => "ImageMask",
"I" => "Interpolate",
"D" => "Decode",
"EF" => "EFontFile",
"Intent" => "Intent",
_ => &key,
};
expanded.insert(expanded_key.to_string(), value);
}
expanded
}