1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
use crate::object::Dict;
use crate::object::dict::keys::COLOR_TRANSFORM;
use crate::object::stream::{FilterResult, ImageColorSpace, ImageData, ImageDecodeParams};
use alloc::borrow::Cow;
use core::num::NonZeroU32;
use zune_jpeg::zune_core::bytestream::ZCursor;
use zune_jpeg::zune_core::colorspace::ColorSpace;
use zune_jpeg::zune_core::colorspace::ColorSpace::CMYK;
use zune_jpeg::zune_core::options::DecoderOptions;
pub(crate) fn decode(
data: &[u8],
params: Dict<'_>,
image_params: &ImageDecodeParams,
) -> Option<FilterResult> {
if image_params.width > u16::MAX as u32 || image_params.height > u16::MAX as u32 {
return None;
}
// Some PDFs have weird JPEGs where the JPEG metadata is completely wrong
// (for example indicating that one of the dimensions is u16::MAX), but the
// metadata in the PDF image dictionary is correct. Therefore, we first
// validate the JPEG metadata and patch the data if any of the dimensions
// are too large (if they are too small, they will just be padded later on).
let data = maybe_patch_jpeg_dimensions(data, image_params)?;
let options = DecoderOptions::default()
.set_max_width(u16::MAX as usize)
.set_max_height(u16::MAX as usize);
let mut decoder = zune_jpeg::JpegDecoder::new_with_options(ZCursor::new(&*data), options);
decoder.decode_headers().ok()?;
let color_transform = params.get::<u8>(COLOR_TRANSFORM);
let input_color_space = decoder.input_colorspace()?;
// Track whether the JPEG decoder will apply a YCbCr→RGB colour transform.
// When it does, the decoded bytes are already in sRGB colorimetry (BT.601
// matrix) and any PDF ICCBased profile must NOT be applied on top.
let mut jpeg_ycbcr_to_rgb = false;
let mut out_colorspace = if let Some(num_components) = image_params.num_components
&& !matches!(num_components, 1 | 3 | 4)
{
ColorSpace::MultiBand(NonZeroU32::new(num_components as u32)?)
} else {
match input_color_space {
ColorSpace::YCbCr => {
if color_transform.is_none_or(|c| c == 1) {
jpeg_ycbcr_to_rgb = true;
ColorSpace::RGB
} else {
ColorSpace::YCbCr
}
}
ColorSpace::RGB | ColorSpace::RGBA => ColorSpace::RGB,
ColorSpace::Luma | ColorSpace::LumaA => ColorSpace::Luma,
// TODO: Find test case with color transform on cmyk
CMYK => CMYK,
ColorSpace::YCCK => ColorSpace::YCCK,
_ => ColorSpace::RGB,
}
};
// In case image had APP14 marker, we might have to override the colorspace.
if input_color_space == CMYK && decoder.info()?.components == 3 {
out_colorspace = ColorSpace::RGB;
}
decoder.set_options(DecoderOptions::default().jpeg_set_out_colorspace(out_colorspace));
let mut decoded = decoder.decode().ok()?;
if out_colorspace == ColorSpace::YCCK {
// YCCK JPEG: channels 0-2 are YCbCr, channel 3 is K (JPEG-inverted: 255=no ink).
// Convert YCbCr to CMY (ink-density form: 0=no ink, 255=full ink), then invert K
// so all four channels are in PDF DeviceCMYK convention (0=no ink, 255=full ink).
// Downstream DeviceCMYK ICC profile expects this convention.
//
// Conversion formula adapted from:
// <https://github.com/mozilla/pdf.js/blob/69595a29192b7704733404a42a2ebb537601117b/src/core/jpg.js#L1331>
// Values are clamped to [0, 255] to avoid wrapping artefacts from the as-cast.
for c in decoded.chunks_mut(4) {
let y = c[0] as f32;
let cb = c[1] as f32;
let cr = c[2] as f32;
c[0] = (434.456 - y - 1.402 * cr).clamp(0.0, 255.0) as u8;
c[1] = (119.541 - y + 0.344 * cb + 0.714 * cr).clamp(0.0, 255.0) as u8;
c[2] = (481.816 - y - 1.772 * cb).clamp(0.0, 255.0) as u8;
// Invert K: JPEG stores K as 255=no ink; DeviceCMYK expects 0=no ink.
c[3] = 255 - c[3];
}
}
// JPEG CMYK (including YCCK after the conversion above): JPEG encodes CMYK as
// inverted ink density (255 = no ink, 0 = full ink), i.e. the complement of the
// PDF DeviceCMYK convention (0 = no ink). Invert all channels so that the data
// can be fed directly into the DeviceCMYK ICC profile which expects standard
// DeviceCMYK values.
if out_colorspace == CMYK {
for byte in &mut decoded {
*byte = 255 - *byte;
}
}
let (w, h) = decoder.dimensions()?;
let width = w as u32;
let height = h as u32;
let image_data = ImageData {
alpha: None,
color_space: match out_colorspace {
ColorSpace::RGB | ColorSpace::YCbCr => {
if jpeg_ycbcr_to_rgb {
// Signal that the bytes are already in sRGB (BT.601 matrix
// applied by the JPEG decoder). x_object.rs uses this to
// skip any PDF ICCBased/CalRGB conversion.
Some(ImageColorSpace::RgbFromYCbCr)
} else {
Some(ImageColorSpace::Rgb)
}
}
ColorSpace::Luma => Some(ImageColorSpace::Gray),
ColorSpace::YCCK | CMYK => Some(ImageColorSpace::Cmyk),
ColorSpace::MultiBand(_) => None,
_ => None,
},
bits_per_component: 8,
width,
height,
};
Some(FilterResult {
data: decoded,
image_data: Some(image_data),
})
}
fn maybe_patch_jpeg_dimensions<'a>(
data: &'a [u8],
image_params: &ImageDecodeParams,
) -> Option<Cow<'a, [u8]>> {
let sof_offset = find_sof_marker(data)?;
let height_offset = sof_offset + 5;
let width_offset = sof_offset + 7;
let jpeg_height =
u16::from_be_bytes([*data.get(height_offset)?, *data.get(height_offset + 1)?]);
let jpeg_width = u16::from_be_bytes([*data.get(width_offset)?, *data.get(width_offset + 1)?]);
let need_patch =
(jpeg_width as u32) * (jpeg_height as u32) > image_params.width * image_params.height;
if !need_patch {
return Some(Cow::Borrowed(data));
}
let target_w = (image_params.width as u16).to_be_bytes();
let target_h = (image_params.height as u16).to_be_bytes();
let mut patched = data.to_vec();
patched[height_offset..height_offset + 2].copy_from_slice(&target_h);
patched[width_offset..width_offset + 2].copy_from_slice(&target_w);
Some(Cow::Owned(patched))
}
fn find_sof_marker(data: &[u8]) -> Option<usize> {
let mut i = 0;
while i + 1 < data.len() {
if data[i] != 0xFF {
i += 1;
continue;
}
let marker = data[i + 1];
// Note: Not sure if 100% correct/robust, is AI-generated.
match marker {
// All SOF markers carry dimensions: SOF0–SOF15, excluding
// 0xC4 (DHT), 0xC8 (JPG), 0xCC (DAC) which are not frame markers.
0xC0..=0xCF if marker != 0xC4 && marker != 0xC8 && marker != 0xCC => {
return Some(i);
}
// Skip padding bytes (0xFF followed by 0xFF).
0xFF => {
i += 1;
continue;
}
// SOI (0xD8), EOI (0xD9), TEM (0x01) and stuffed byte (0x00)
// are standalone markers with no payload.
0xD8 | 0xD9 | 0x01 | 0x00 => {
i += 2;
continue;
}
// All other markers have a 2-byte length field — skip over them.
_ => {
let seg_len = u16::from_be_bytes([*data.get(i + 2)?, *data.get(i + 3)?]) as usize;
i += 2 + seg_len;
}
}
}
None
}